@peerbit/shared-log 12.3.5-484315e → 12.3.5-9b39434

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -53,6 +53,7 @@ import {
53
53
  DataMessage,
54
54
  MessageHeader,
55
55
  NotStartedError,
56
+ type RouteHint,
56
57
  SilentDelivery,
57
58
  } from "@peerbit/stream-interface";
58
59
  import {
@@ -440,6 +441,25 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
440
441
  const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
441
442
 
442
443
  const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
444
+ const RECENT_REPAIR_DISPATCH_TTL_MS = 5_000;
445
+ const REPAIR_SWEEP_ENTRY_BATCH_SIZE = 1_000;
446
+ const REPAIR_SWEEP_TARGET_BUFFER_SIZE = 1024;
447
+ const FORCE_FRESH_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
448
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000];
449
+
450
+ const toPositiveInteger = (
451
+ value: number | undefined,
452
+ fallback: number,
453
+ label: string,
454
+ ) => {
455
+ if (value == null) {
456
+ return fallback;
457
+ }
458
+ if (!Number.isFinite(value) || value <= 0) {
459
+ throw new Error(`${label} must be a positive number`);
460
+ }
461
+ return Math.max(1, Math.floor(value));
462
+ };
443
463
 
444
464
  const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS: Omit<
445
465
  FanoutTreeChannelOptions,
@@ -474,8 +494,11 @@ export type Args<
474
494
  : "u32",
475
495
  > = LogProperties<T> & LogEvents<T> & SharedLogOptions<T, D, R>;
476
496
 
497
+ export type DeliveryReliability = "ack" | "best-effort";
498
+
477
499
  export type DeliveryOptions = {
478
- settle?: true | { min: number };
500
+ reliability?: DeliveryReliability;
501
+ minAcks?: number;
479
502
  requireRecipients?: boolean;
480
503
  timeout?: number;
481
504
  signal?: AbortSignal;
@@ -647,6 +670,11 @@ export class SharedLog<
647
670
  private replicationChangeDebounceFn!: ReturnType<
648
671
  typeof debounceAggregationChanges<ReplicationRangeIndexable<R>>
649
672
  >;
673
+ private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
674
+ private _recentRepairDispatch!: Map<string, Map<string, number>>;
675
+ private _repairSweepRunning!: boolean;
676
+ private _repairSweepForceFreshPending!: boolean;
677
+ private _repairSweepAddedPeersPending!: Set<string>;
650
678
 
651
679
  // regular distribution checks
652
680
  private distributeQueue?: PQueue;
@@ -663,6 +691,7 @@ export class SharedLog<
663
691
  waitForReplicatorRequestMaxAttempts?: number;
664
692
  waitForPruneDelay!: number;
665
693
  distributionDebounceTime!: number;
694
+ repairSweepTargetBufferSize!: number;
666
695
 
667
696
  replicationController!: PIDReplicationController;
668
697
  history!: { usedMemory: number; factor: number }[];
@@ -873,32 +902,34 @@ export class SharedLog<
873
902
  deliveryArg: false | true | DeliveryOptions | undefined,
874
903
  ): {
875
904
  delivery?: DeliveryOptions;
905
+ reliability: DeliveryReliability;
876
906
  requireRecipients: boolean;
877
- settleMin?: number;
907
+ minAcks?: number;
878
908
  wrap?: (promise: Promise<void>) => Promise<void>;
879
909
  } {
880
910
  const delivery: DeliveryOptions | undefined =
881
911
  deliveryArg === undefined || deliveryArg === false
882
912
  ? undefined
883
913
  : deliveryArg === true
884
- ? {}
914
+ ? { reliability: "ack" }
885
915
  : deliveryArg;
886
916
  if (!delivery) {
887
917
  return {
888
918
  delivery: undefined,
919
+ reliability: "best-effort",
889
920
  requireRecipients: false,
890
- settleMin: undefined,
921
+ minAcks: undefined,
891
922
  wrap: undefined,
892
923
  };
893
924
  }
894
925
 
895
- const deliverySettle = delivery.settle ?? true;
926
+ const reliability: DeliveryReliability = delivery.reliability ?? "ack";
896
927
  const deliveryTimeout = delivery.timeout;
897
928
  const deliverySignal = delivery.signal;
898
929
  const requireRecipients = delivery.requireRecipients === true;
899
- const settleMin =
900
- typeof deliverySettle === "object" && Number.isFinite(deliverySettle.min)
901
- ? Math.max(0, Math.floor(deliverySettle.min))
930
+ const minAcks =
931
+ delivery.minAcks != null && Number.isFinite(delivery.minAcks)
932
+ ? Math.max(0, Math.floor(delivery.minAcks))
902
933
  : undefined;
903
934
 
904
935
  const wrap =
@@ -967,12 +998,107 @@ export class SharedLog<
967
998
 
968
999
  return {
969
1000
  delivery,
1001
+ reliability,
970
1002
  requireRecipients,
971
- settleMin,
1003
+ minAcks,
972
1004
  wrap,
973
1005
  };
974
1006
  }
975
1007
 
1008
+ private async _getSortedRouteHints(
1009
+ targetHash: string,
1010
+ ): Promise<RouteHint[]> {
1011
+ const pubsub: any = this.node.services.pubsub as any;
1012
+ const maybeHints = await pubsub?.getUnifiedRouteHints?.(this.topic, targetHash);
1013
+ const hints: RouteHint[] = Array.isArray(maybeHints) ? maybeHints : [];
1014
+ const now = Date.now();
1015
+ return hints
1016
+ .filter((hint) => hint.expiresAt == null || hint.expiresAt > now)
1017
+ .sort((a, b) => {
1018
+ const rankA = a.kind === "directstream-ack" ? 0 : 1;
1019
+ const rankB = b.kind === "directstream-ack" ? 0 : 1;
1020
+ if (rankA !== rankB) {
1021
+ return rankA - rankB;
1022
+ }
1023
+
1024
+ const costA =
1025
+ a.kind === "directstream-ack"
1026
+ ? a.distance
1027
+ : Math.max(0, (a.route?.length ?? 1) - 1);
1028
+ const costB =
1029
+ b.kind === "directstream-ack"
1030
+ ? b.distance
1031
+ : Math.max(0, (b.route?.length ?? 1) - 1);
1032
+ if (costA !== costB) {
1033
+ return costA - costB;
1034
+ }
1035
+
1036
+ return (b.updatedAt ?? 0) - (a.updatedAt ?? 0);
1037
+ });
1038
+ }
1039
+
1040
+ private async _sendAckWithUnifiedHints(properties: {
1041
+ peer: string;
1042
+ message: ExchangeHeadsMessage<any>;
1043
+ payload: Uint8Array;
1044
+ fanoutUnicastOptions?: { timeoutMs?: number; signal?: AbortSignal };
1045
+ }): Promise<void> {
1046
+ const { peer, message, payload, fanoutUnicastOptions } = properties;
1047
+ const hints = await this._getSortedRouteHints(peer);
1048
+ const hasDirectHint = hints.some((hint) => hint.kind === "directstream-ack");
1049
+ const fanoutHint = hints.find(
1050
+ (hint): hint is Extract<RouteHint, { kind: "fanout-token" }> =>
1051
+ hint.kind === "fanout-token",
1052
+ );
1053
+
1054
+ if (hasDirectHint) {
1055
+ try {
1056
+ await this.rpc.send(message, {
1057
+ mode: new AcknowledgeDelivery({
1058
+ redundancy: 1,
1059
+ to: [peer],
1060
+ }),
1061
+ });
1062
+ return;
1063
+ } catch {
1064
+ // Fall back to fanout token/direct fanout unicast below.
1065
+ }
1066
+ }
1067
+
1068
+ if (fanoutHint && this._fanoutChannel) {
1069
+ try {
1070
+ await this._fanoutChannel.unicastAck(
1071
+ fanoutHint.route,
1072
+ payload,
1073
+ fanoutUnicastOptions,
1074
+ );
1075
+ return;
1076
+ } catch {
1077
+ // Fall back below.
1078
+ }
1079
+ }
1080
+
1081
+ if (this._fanoutChannel) {
1082
+ try {
1083
+ await this._fanoutChannel.unicastToAck(
1084
+ peer,
1085
+ payload,
1086
+ fanoutUnicastOptions,
1087
+ );
1088
+ return;
1089
+ } catch {
1090
+ // Fall back below.
1091
+ }
1092
+ }
1093
+
1094
+ await this.rpc.send(message, {
1095
+ mode: new AcknowledgeDelivery({
1096
+ redundancy: 1,
1097
+ to: [peer],
1098
+ }),
1099
+ });
1100
+ }
1101
+
976
1102
  private async _appendDeliverToReplicators(
977
1103
  entry: Entry<T>,
978
1104
  minReplicasValue: number,
@@ -981,7 +1107,7 @@ export class SharedLog<
981
1107
  isLeader: boolean,
982
1108
  deliveryArg: false | true | DeliveryOptions | undefined,
983
1109
  ) {
984
- const { delivery, requireRecipients, settleMin, wrap } =
1110
+ const { delivery, reliability, requireRecipients, minAcks, wrap } =
985
1111
  this._parseDeliveryOptions(deliveryArg);
986
1112
  const pending: Promise<void>[] = [];
987
1113
  const track = (promise: Promise<void>) => {
@@ -997,11 +1123,32 @@ export class SharedLog<
997
1123
  const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
998
1124
 
999
1125
  const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
1000
- const hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1126
+ let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1127
+ const allowSubscriberFallback =
1128
+ this.syncronizer instanceof SimpleSyncronizer ||
1129
+ (this.compatibility ?? Number.MAX_VALUE) < 10;
1130
+ if (!hasRemotePeers && allowSubscriberFallback) {
1131
+ try {
1132
+ const subscribers = await this._getTopicSubscribers(this.topic);
1133
+ if (subscribers && subscribers.length > 0) {
1134
+ for (const subscriber of subscribers) {
1135
+ const hash = subscriber.hashcode();
1136
+ if (hash === selfHash) {
1137
+ continue;
1138
+ }
1139
+ set.add(hash);
1140
+ leadersForDelivery?.add(hash);
1141
+ }
1142
+ hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1143
+ }
1144
+ } catch {
1145
+ // Best-effort only; keep discovered recipients as-is.
1146
+ }
1147
+ }
1001
1148
  if (!hasRemotePeers) {
1002
1149
  if (requireRecipients) {
1003
- throw new NoPeersError(this.rpc.topic);
1004
- }
1150
+ throw new NoPeersError(this.rpc.topic);
1151
+ }
1005
1152
  continue;
1006
1153
  }
1007
1154
 
@@ -1037,8 +1184,13 @@ export class SharedLog<
1037
1184
  let silentTo: string[] | undefined;
1038
1185
  // Default delivery semantics: require enough remote ACKs to reach the requested
1039
1186
  // replication degree (local append counts as 1).
1040
- const ackLimit =
1041
- settleMin == null ? Math.max(0, minReplicasValue - 1) : settleMin;
1187
+ const defaultMinAcks = Math.max(0, minReplicasValue - 1);
1188
+ const ackLimitRaw =
1189
+ reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
1190
+ const ackLimit = Math.max(
1191
+ 0,
1192
+ Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length),
1193
+ );
1042
1194
 
1043
1195
  for (const peer of orderedRemoteRecipients) {
1044
1196
  if (ackTo.length < ackLimit) {
@@ -1061,48 +1213,11 @@ export class SharedLog<
1061
1213
  for (const peer of ackTo) {
1062
1214
  track(
1063
1215
  (async () => {
1064
- // Unified decision point:
1065
- // - If we can prove a cheap direct path (connected or routed), use it.
1066
- // - Otherwise, fall back to the fanout unicast ACK path (bounded overlay routing).
1067
- // - If that fails, fall back to pubsub/RPC routing which may flood to discover routes.
1068
- const pubsub: any = this.node.services.pubsub as any;
1069
- const canDirectFast =
1070
- Boolean(pubsub?.peers?.get?.(peer)?.isWritable) ||
1071
- Boolean(
1072
- pubsub?.routes?.isReachable?.(
1073
- pubsub?.publicKeyHash,
1074
- peer,
1075
- 0,
1076
- ),
1077
- );
1078
-
1079
- if (canDirectFast) {
1080
- await this.rpc.send(message, {
1081
- mode: new AcknowledgeDelivery({
1082
- redundancy: 1,
1083
- to: [peer],
1084
- }),
1085
- });
1086
- return;
1087
- }
1088
-
1089
- if (this._fanoutChannel) {
1090
- try {
1091
- await this._fanoutChannel.unicastToAck(
1092
- peer,
1093
- payload,
1094
- fanoutUnicastOptions,
1095
- );
1096
- return;
1097
- } catch {
1098
- // fall back below
1099
- }
1100
- }
1101
- await this.rpc.send(message, {
1102
- mode: new AcknowledgeDelivery({
1103
- redundancy: 1,
1104
- to: [peer],
1105
- }),
1216
+ await this._sendAckWithUnifiedHints({
1217
+ peer,
1218
+ message,
1219
+ payload,
1220
+ fanoutUnicastOptions,
1106
1221
  });
1107
1222
  })(),
1108
1223
  );
@@ -1723,6 +1838,14 @@ export class SharedLog<
1723
1838
  this.pendingMaturity.delete(keyHash);
1724
1839
  }
1725
1840
 
1841
+ // Keep local sync/prune state consistent even when a peer disappears
1842
+ // through replication-info updates without a topic unsubscribe event.
1843
+ this.removePeerFromGidPeerHistory(keyHash);
1844
+ this._recentRepairDispatch.delete(keyHash);
1845
+ if (!isMe) {
1846
+ this.syncronizer.onPeerDisconnected(keyHash);
1847
+ }
1848
+
1726
1849
  if (!isMe) {
1727
1850
  this.rebalanceParticipationDebounced?.call();
1728
1851
  }
@@ -2207,6 +2330,218 @@ export class SharedLog<
2207
2330
  return set;
2208
2331
  }
2209
2332
 
2333
+ private dispatchMaybeMissingEntries(
2334
+ target: string,
2335
+ entries: Map<string, EntryReplicated<R>>,
2336
+ options?: {
2337
+ bypassRecentDedupe?: boolean;
2338
+ retryScheduleMs?: number[];
2339
+ forceFreshDelivery?: boolean;
2340
+ },
2341
+ ) {
2342
+ if (entries.size === 0) {
2343
+ return;
2344
+ }
2345
+
2346
+ const now = Date.now();
2347
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2348
+ if (!recentlyDispatchedByHash) {
2349
+ recentlyDispatchedByHash = new Map();
2350
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2351
+ }
2352
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2353
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2354
+ recentlyDispatchedByHash.delete(hash);
2355
+ }
2356
+ }
2357
+
2358
+ const filteredEntries =
2359
+ options?.bypassRecentDedupe === true
2360
+ ? new Map(entries)
2361
+ : new Map<string, EntryReplicated<any>>();
2362
+ if (options?.bypassRecentDedupe !== true) {
2363
+ for (const [hash, entry] of entries) {
2364
+ const prev = recentlyDispatchedByHash.get(hash);
2365
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2366
+ continue;
2367
+ }
2368
+ recentlyDispatchedByHash.set(hash, now);
2369
+ filteredEntries.set(hash, entry);
2370
+ }
2371
+ } else {
2372
+ for (const hash of entries.keys()) {
2373
+ recentlyDispatchedByHash.set(hash, now);
2374
+ }
2375
+ }
2376
+ if (filteredEntries.size === 0) {
2377
+ return;
2378
+ }
2379
+
2380
+ const run = () => {
2381
+ // For force-fresh churn repair we intentionally bypass rateless IBLT and
2382
+ // use simple hash-based sync. This path is a directed "push these hashes
2383
+ // to that peer" recovery flow; using simple sync here avoids occasional
2384
+ // single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
2385
+ if (
2386
+ options?.forceFreshDelivery &&
2387
+ this.syncronizer instanceof RatelessIBLTSynchronizer
2388
+ ) {
2389
+ return Promise.resolve(
2390
+ this.syncronizer.simple.onMaybeMissingEntries({
2391
+ entries: filteredEntries,
2392
+ targets: [target],
2393
+ }),
2394
+ ).catch((error: any) => logger.error(error));
2395
+ }
2396
+
2397
+ return Promise.resolve(
2398
+ this.syncronizer.onMaybeMissingEntries({
2399
+ entries: filteredEntries,
2400
+ targets: [target],
2401
+ }),
2402
+ ).catch((error: any) => logger.error(error));
2403
+ };
2404
+
2405
+ const retrySchedule =
2406
+ options?.retryScheduleMs && options.retryScheduleMs.length > 0
2407
+ ? options.retryScheduleMs
2408
+ : options?.forceFreshDelivery
2409
+ ? FORCE_FRESH_RETRY_SCHEDULE_MS
2410
+ : [0];
2411
+
2412
+ for (const delayMs of retrySchedule) {
2413
+ if (delayMs === 0) {
2414
+ void run();
2415
+ continue;
2416
+ }
2417
+ const timer = setTimeout(() => {
2418
+ this._repairRetryTimers.delete(timer);
2419
+ if (this.closed) {
2420
+ return;
2421
+ }
2422
+ void run();
2423
+ }, delayMs);
2424
+ timer.unref?.();
2425
+ this._repairRetryTimers.add(timer);
2426
+ }
2427
+ }
2428
+
2429
+ private scheduleRepairSweep(options: {
2430
+ forceFreshDelivery: boolean;
2431
+ addedPeers: Set<string>;
2432
+ }) {
2433
+ if (options.forceFreshDelivery) {
2434
+ this._repairSweepForceFreshPending = true;
2435
+ }
2436
+ for (const peer of options.addedPeers) {
2437
+ this._repairSweepAddedPeersPending.add(peer);
2438
+ }
2439
+ if (!this._repairSweepRunning && !this.closed) {
2440
+ this._repairSweepRunning = true;
2441
+ void this.runRepairSweep();
2442
+ }
2443
+ }
2444
+
2445
+ private async runRepairSweep() {
2446
+ try {
2447
+ while (!this.closed) {
2448
+ const forceFreshDelivery = this._repairSweepForceFreshPending;
2449
+ const addedPeers = new Set(this._repairSweepAddedPeersPending);
2450
+ this._repairSweepForceFreshPending = false;
2451
+ this._repairSweepAddedPeersPending.clear();
2452
+
2453
+ if (!forceFreshDelivery && addedPeers.size === 0) {
2454
+ return;
2455
+ }
2456
+
2457
+ const pendingByTarget = new Map<string, Map<string, EntryReplicated<any>>>();
2458
+ const flushTarget = (target: string) => {
2459
+ const entries = pendingByTarget.get(target);
2460
+ if (!entries || entries.size === 0) {
2461
+ return;
2462
+ }
2463
+ const isJoinWarmupTarget = addedPeers.has(target);
2464
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
2465
+ this.dispatchMaybeMissingEntries(target, entries, {
2466
+ bypassRecentDedupe,
2467
+ retryScheduleMs: isJoinWarmupTarget
2468
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
2469
+ : undefined,
2470
+ forceFreshDelivery,
2471
+ });
2472
+ pendingByTarget.delete(target);
2473
+ };
2474
+ const queueEntryForTarget = (
2475
+ target: string,
2476
+ entry: EntryReplicated<any>,
2477
+ ) => {
2478
+ let set = pendingByTarget.get(target);
2479
+ if (!set) {
2480
+ set = new Map();
2481
+ pendingByTarget.set(target, set);
2482
+ }
2483
+ if (set.has(entry.hash)) {
2484
+ return;
2485
+ }
2486
+ set.set(entry.hash, entry);
2487
+ if (set.size >= this.repairSweepTargetBufferSize) {
2488
+ flushTarget(target);
2489
+ }
2490
+ };
2491
+
2492
+ const iterator = this.entryCoordinatesIndex.iterate({});
2493
+ try {
2494
+ while (!this.closed && !iterator.done()) {
2495
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2496
+ for (const entry of entries) {
2497
+ const entryReplicated = entry.value;
2498
+ const currentPeers = await this.findLeaders(
2499
+ entryReplicated.coordinates,
2500
+ entryReplicated,
2501
+ { roleAge: 0 },
2502
+ );
2503
+ if (forceFreshDelivery) {
2504
+ for (const [currentPeer] of currentPeers) {
2505
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
2506
+ continue;
2507
+ }
2508
+ queueEntryForTarget(currentPeer, entryReplicated);
2509
+ }
2510
+ }
2511
+ if (addedPeers.size > 0) {
2512
+ for (const peer of addedPeers) {
2513
+ if (currentPeers.has(peer)) {
2514
+ queueEntryForTarget(peer, entryReplicated);
2515
+ }
2516
+ }
2517
+ }
2518
+ }
2519
+ }
2520
+ } finally {
2521
+ await iterator.close();
2522
+ }
2523
+
2524
+ for (const target of [...pendingByTarget.keys()]) {
2525
+ flushTarget(target);
2526
+ }
2527
+ }
2528
+ } catch (error: any) {
2529
+ if (!isNotStartedError(error)) {
2530
+ logger.error(`Repair sweep failed: ${error?.message ?? error}`);
2531
+ }
2532
+ } finally {
2533
+ this._repairSweepRunning = false;
2534
+ if (
2535
+ !this.closed &&
2536
+ (this._repairSweepForceFreshPending ||
2537
+ this._repairSweepAddedPeersPending.size > 0)
2538
+ ) {
2539
+ this._repairSweepRunning = true;
2540
+ void this.runRepairSweep();
2541
+ }
2542
+ }
2543
+ }
2544
+
2210
2545
  private async pruneDebouncedFnAddIfNotKeeping(args: {
2211
2546
  key: string;
2212
2547
  value: {
@@ -2428,10 +2763,15 @@ export class SharedLog<
2428
2763
  this._pendingIHave = new Map();
2429
2764
  this.latestReplicationInfoMessage = new Map();
2430
2765
  this._replicationInfoBlockedPeers = new Set();
2431
- this._replicationInfoRequestByPeer = new Map();
2432
- this._replicationInfoApplyQueueByPeer = new Map();
2433
- this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2434
- this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
2766
+ this._replicationInfoRequestByPeer = new Map();
2767
+ this._replicationInfoApplyQueueByPeer = new Map();
2768
+ this._repairRetryTimers = new Set();
2769
+ this._recentRepairDispatch = new Map();
2770
+ this._repairSweepRunning = false;
2771
+ this._repairSweepForceFreshPending = false;
2772
+ this._repairSweepAddedPeersPending = new Set();
2773
+ this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2774
+ this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
2435
2775
 
2436
2776
  this.uniqueReplicators = new Set();
2437
2777
  this._replicatorJoinEmitted = new Set();
@@ -2441,6 +2781,11 @@ export class SharedLog<
2441
2781
  this.oldestOpenTime = this.openTime;
2442
2782
  this.distributionDebounceTime =
2443
2783
  options?.distributionDebounceTime || DEFAULT_DISTRIBUTION_DEBOUNCE_TIME; // expect > 0
2784
+ this.repairSweepTargetBufferSize = toPositiveInteger(
2785
+ options?.sync?.repairSweepTargetBufferSize,
2786
+ REPAIR_SWEEP_TARGET_BUFFER_SIZE,
2787
+ "sync.repairSweepTargetBufferSize",
2788
+ );
2444
2789
 
2445
2790
  this.timeUntilRoleMaturity =
2446
2791
  options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
@@ -3197,6 +3542,14 @@ export class SharedLog<
3197
3542
  "unsubscribe",
3198
3543
  this._onUnsubscriptionFn,
3199
3544
  );
3545
+ for (const timer of this._repairRetryTimers) {
3546
+ clearTimeout(timer);
3547
+ }
3548
+ this._repairRetryTimers.clear();
3549
+ this._recentRepairDispatch.clear();
3550
+ this._repairSweepRunning = false;
3551
+ this._repairSweepForceFreshPending = false;
3552
+ this._repairSweepAddedPeersPending.clear();
3200
3553
 
3201
3554
  for (const [_k, v] of this._pendingDeletes) {
3202
3555
  v.clear();
@@ -3390,7 +3743,6 @@ export class SharedLog<
3390
3743
  if (filteredHeads.length === 0) {
3391
3744
  return;
3392
3745
  }
3393
-
3394
3746
  const groupedByGid = await groupByGid(filteredHeads);
3395
3747
  const promises: Promise<void>[] = [];
3396
3748
 
@@ -4664,8 +5016,8 @@ export class SharedLog<
4664
5016
  const selfHash = this.node.identity.publicKey.hashcode();
4665
5017
 
4666
5018
  // Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
4667
- // Falling back to live pubsub subscribers can include non-replicators and can
4668
- // break delivery/availability when writers are not directly connected.
5019
+ // If it is still warming up (for example, only contains self), supplement with
5020
+ // current subscribers until we have enough candidates for this decision.
4669
5021
  let peerFilter: Set<string> | undefined = undefined;
4670
5022
  const selfReplicating = await this.isReplicating();
4671
5023
  if (this.uniqueReplicators.size > 0) {
@@ -4675,6 +5027,22 @@ export class SharedLog<
4675
5027
  } else {
4676
5028
  peerFilter.delete(selfHash);
4677
5029
  }
5030
+
5031
+ try {
5032
+ const subscribers = await this._getTopicSubscribers(this.topic);
5033
+ if (subscribers && subscribers.length > 0) {
5034
+ for (const subscriber of subscribers) {
5035
+ peerFilter.add(subscriber.hashcode());
5036
+ }
5037
+ if (selfReplicating) {
5038
+ peerFilter.add(selfHash);
5039
+ } else {
5040
+ peerFilter.delete(selfHash);
5041
+ }
5042
+ }
5043
+ } catch {
5044
+ // Best-effort only; keep current peerFilter.
5045
+ }
4678
5046
  } else {
4679
5047
  try {
4680
5048
  const subscribers =
@@ -4825,9 +5193,20 @@ export class SharedLog<
4825
5193
  }
4826
5194
 
4827
5195
  if (!subscribed) {
5196
+ const wasReplicator = this.uniqueReplicators.has(peerHash);
5197
+ try {
5198
+ // Unsubscribe can race with the peer's final replication reset message.
5199
+ // Proactively evict its ranges so leader selection doesn't keep stale owners.
5200
+ await this.removeReplicator(publicKey, { noEvent: true });
5201
+ } catch (error) {
5202
+ if (!isNotStartedError(error as Error)) {
5203
+ throw error;
5204
+ }
5205
+ }
5206
+
4828
5207
  // Emit replicator:leave at most once per (join -> leave) transition, even if we
4829
5208
  // concurrently process unsubscribe + replication reset messages for the same peer.
4830
- const stoppedTransition = this.uniqueReplicators.delete(peerHash);
5209
+ const stoppedTransition = wasReplicator;
4831
5210
  this._replicatorJoinEmitted.delete(peerHash);
4832
5211
 
4833
5212
  this.cancelReplicationInfoRequests(peerHash);
@@ -5317,9 +5696,9 @@ export class SharedLog<
5317
5696
  * that we potentially need to share with other peers
5318
5697
  */
5319
5698
 
5320
- if (this.closed) {
5321
- return;
5322
- }
5699
+ if (this.closed) {
5700
+ return;
5701
+ }
5323
5702
 
5324
5703
  await this.log.trim();
5325
5704
 
@@ -5327,23 +5706,94 @@ export class SharedLog<
5327
5706
  ? (changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>[])
5328
5707
  : [changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>];
5329
5708
  const changes = batchedChanges.flat();
5709
+ const selfHash = this.node.identity.publicKey.hashcode();
5330
5710
  // On removed ranges (peer leaves / shrink), gid-level history can hide
5331
5711
  // per-entry gaps. Force a fresh delivery pass for reassigned entries.
5332
- const forceFreshDelivery = changes.some((change) => change.type === "removed");
5712
+ const forceFreshDelivery = changes.some(
5713
+ (change) => change.type === "removed" && change.range.hash !== selfHash,
5714
+ );
5333
5715
  const gidPeersHistorySnapshot = new Map<string, Set<string> | undefined>();
5716
+ const dedupeCutoff = Date.now() - RECENT_REPAIR_DISPATCH_TTL_MS;
5717
+ for (const [target, hashes] of this._recentRepairDispatch) {
5718
+ for (const [hash, ts] of hashes) {
5719
+ if (ts <= dedupeCutoff) {
5720
+ hashes.delete(hash);
5721
+ }
5722
+ }
5723
+ if (hashes.size === 0) {
5724
+ this._recentRepairDispatch.delete(target);
5725
+ }
5726
+ }
5334
5727
 
5335
5728
  const changed = false;
5729
+ const replacedPeers = new Set<string>();
5730
+ for (const change of changes) {
5731
+ if (change.type === "replaced" && change.range.hash !== selfHash) {
5732
+ replacedPeers.add(change.range.hash);
5733
+ }
5734
+ }
5735
+ const addedPeers = new Set<string>();
5736
+ for (const change of changes) {
5737
+ if (change.type === "added" || change.type === "replaced") {
5738
+ const hash = change.range.hash;
5739
+ if (hash !== selfHash) {
5740
+ // Range updates can reassign entries to an existing peer shortly after it
5741
+ // already received a subset. Avoid suppressing legitimate follow-up repair.
5742
+ this._recentRepairDispatch.delete(hash);
5743
+ }
5744
+ }
5745
+ if (change.type === "added") {
5746
+ const hash = change.range.hash;
5747
+ if (hash !== selfHash && !replacedPeers.has(hash)) {
5748
+ addedPeers.add(hash);
5749
+ }
5750
+ }
5751
+ }
5336
5752
 
5337
5753
  try {
5338
5754
  const uncheckedDeliver: Map<
5339
5755
  string,
5340
5756
  Map<string, EntryReplicated<any>>
5341
5757
  > = new Map();
5758
+ const flushUncheckedDeliverTarget = (target: string) => {
5759
+ const entries = uncheckedDeliver.get(target);
5760
+ if (!entries || entries.size === 0) {
5761
+ return;
5762
+ }
5763
+ const isJoinWarmupTarget = addedPeers.has(target);
5764
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
5765
+ this.dispatchMaybeMissingEntries(target, entries, {
5766
+ bypassRecentDedupe,
5767
+ retryScheduleMs: isJoinWarmupTarget
5768
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
5769
+ : undefined,
5770
+ forceFreshDelivery,
5771
+ });
5772
+ uncheckedDeliver.delete(target);
5773
+ };
5774
+ const queueUncheckedDeliver = (
5775
+ target: string,
5776
+ entry: EntryReplicated<any>,
5777
+ ) => {
5778
+ let set = uncheckedDeliver.get(target);
5779
+ if (!set) {
5780
+ set = new Map();
5781
+ uncheckedDeliver.set(target, set);
5782
+ }
5783
+ if (set.has(entry.hash)) {
5784
+ return;
5785
+ }
5786
+ set.set(entry.hash, entry);
5787
+ if (set.size >= this.repairSweepTargetBufferSize) {
5788
+ flushUncheckedDeliverTarget(target);
5789
+ }
5790
+ };
5342
5791
 
5343
5792
  for await (const entryReplicated of toRebalance<R>(
5344
5793
  changes,
5345
5794
  this.entryCoordinatesIndex,
5346
5795
  this.recentlyRebalanced,
5796
+ { forceFresh: forceFreshDelivery },
5347
5797
  )) {
5348
5798
  if (this.closed) {
5349
5799
  break;
@@ -5371,24 +5821,16 @@ export class SharedLog<
5371
5821
  },
5372
5822
  );
5373
5823
 
5374
- for (const [currentPeer] of currentPeers) {
5375
- if (currentPeer === this.node.identity.publicKey.hashcode()) {
5376
- isLeader = true;
5377
- continue;
5378
- }
5379
-
5380
- if (!oldPeersSet?.has(currentPeer)) {
5381
- let set = uncheckedDeliver.get(currentPeer);
5382
- if (!set) {
5383
- set = new Map();
5384
- uncheckedDeliver.set(currentPeer, set);
5824
+ for (const [currentPeer] of currentPeers) {
5825
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
5826
+ isLeader = true;
5827
+ continue;
5385
5828
  }
5386
5829
 
5387
- if (!set.has(entryReplicated.hash)) {
5388
- set.set(entryReplicated.hash, entryReplicated);
5830
+ if (!oldPeersSet?.has(currentPeer)) {
5831
+ queueUncheckedDeliver(currentPeer, entryReplicated);
5389
5832
  }
5390
5833
  }
5391
- }
5392
5834
 
5393
5835
  if (oldPeersSet) {
5394
5836
  for (const oldPeer of oldPeersSet) {
@@ -5419,11 +5861,15 @@ export class SharedLog<
5419
5861
  this.removePruneRequestSent(entryReplicated.hash);
5420
5862
  }
5421
5863
  }
5422
- for (const [target, entries] of uncheckedDeliver) {
5423
- this.syncronizer.onMaybeMissingEntries({
5424
- entries,
5425
- targets: [target],
5426
- });
5864
+
5865
+ if (forceFreshDelivery || addedPeers.size > 0) {
5866
+ // Schedule a coalesced background sweep for churn/join windows instead of
5867
+ // scanning the whole index synchronously on each replication change.
5868
+ this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
5869
+ }
5870
+
5871
+ for (const target of [...uncheckedDeliver.keys()]) {
5872
+ flushUncheckedDeliverTarget(target);
5427
5873
  }
5428
5874
 
5429
5875
  return changed;
@@ -5437,51 +5883,52 @@ export class SharedLog<
5437
5883
  }
5438
5884
  }
5439
5885
 
5440
- async _onUnsubscription(evt: CustomEvent<UnsubcriptionEvent>) {
5441
- logger.trace(
5442
- `Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(
5443
- evt.detail.topics.map((x) => x),
5444
- )} '`,
5445
- );
5446
- if (!evt.detail.topics.includes(this.topic)) {
5447
- return;
5448
- }
5886
+ async _onUnsubscription(evt: CustomEvent<UnsubcriptionEvent>) {
5887
+ logger.trace(
5888
+ `Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(
5889
+ evt.detail.topics.map((x) => x),
5890
+ )} '`,
5891
+ );
5892
+ if (!evt.detail.topics.includes(this.topic)) {
5893
+ return;
5894
+ }
5449
5895
 
5450
- const fromHash = evt.detail.from.hashcode();
5451
- this._replicationInfoBlockedPeers.add(fromHash);
5896
+ const fromHash = evt.detail.from.hashcode();
5897
+ this._replicationInfoBlockedPeers.add(fromHash);
5898
+ this._recentRepairDispatch.delete(fromHash);
5899
+
5900
+ // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
5901
+ // prevents late/out-of-order replication-info messages from re-introducing
5902
+ // stale segments for a peer that has already left the topic.
5903
+ const now = BigInt(+new Date());
5904
+ const prev = this.latestReplicationInfoMessage.get(fromHash);
5905
+ if (!prev || prev < now) {
5906
+ this.latestReplicationInfoMessage.set(fromHash, now);
5907
+ }
5452
5908
 
5453
- // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
5454
- // prevents late/out-of-order replication-info messages from re-introducing
5455
- // stale segments for a peer that has already left the topic.
5456
- const now = BigInt(+new Date());
5457
- const prev = this.latestReplicationInfoMessage.get(fromHash);
5458
- if (!prev || prev < now) {
5459
- this.latestReplicationInfoMessage.set(fromHash, now);
5460
- }
5909
+ return this.handleSubscriptionChange(
5910
+ evt.detail.from,
5911
+ evt.detail.topics,
5912
+ false,
5913
+ );
5914
+ }
5461
5915
 
5462
- return this.handleSubscriptionChange(
5463
- evt.detail.from,
5464
- evt.detail.topics,
5465
- false,
5466
- );
5916
+ async _onSubscription(evt: CustomEvent<SubscriptionEvent>) {
5917
+ logger.trace(
5918
+ `New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(
5919
+ evt.detail.topics.map((x) => x),
5920
+ )}'`,
5921
+ );
5922
+ if (!evt.detail.topics.includes(this.topic)) {
5923
+ return;
5467
5924
  }
5468
5925
 
5469
- async _onSubscription(evt: CustomEvent<SubscriptionEvent>) {
5470
- logger.trace(
5471
- `New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(
5472
- evt.detail.topics.map((x) => x),
5473
- )}'`,
5474
- );
5475
- if (!evt.detail.topics.includes(this.topic)) {
5476
- return;
5477
- }
5478
-
5479
- this.remoteBlocks.onReachable(evt.detail.from);
5480
- this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
5926
+ this.remoteBlocks.onReachable(evt.detail.from);
5927
+ this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
5481
5928
 
5482
- return this.handleSubscriptionChange(
5483
- evt.detail.from,
5484
- evt.detail.topics,
5929
+ await this.handleSubscriptionChange(
5930
+ evt.detail.from,
5931
+ evt.detail.topics,
5485
5932
  true,
5486
5933
  );
5487
5934
  }