@peerbit/shared-log 12.3.5-cb91e7b → 13.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -53,6 +53,7 @@ import {
53
53
  DataMessage,
54
54
  MessageHeader,
55
55
  NotStartedError,
56
+ type RouteHint,
56
57
  SilentDelivery,
57
58
  } from "@peerbit/stream-interface";
58
59
  import {
@@ -440,6 +441,25 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
440
441
  const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
441
442
 
442
443
  const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
444
+ const RECENT_REPAIR_DISPATCH_TTL_MS = 5_000;
445
+ const REPAIR_SWEEP_ENTRY_BATCH_SIZE = 1_000;
446
+ const REPAIR_SWEEP_TARGET_BUFFER_SIZE = 1024;
447
+ const FORCE_FRESH_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
448
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000];
449
+
450
+ const toPositiveInteger = (
451
+ value: number | undefined,
452
+ fallback: number,
453
+ label: string,
454
+ ) => {
455
+ if (value == null) {
456
+ return fallback;
457
+ }
458
+ if (!Number.isFinite(value) || value <= 0) {
459
+ throw new Error(`${label} must be a positive number`);
460
+ }
461
+ return Math.max(1, Math.floor(value));
462
+ };
443
463
 
444
464
  const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS: Omit<
445
465
  FanoutTreeChannelOptions,
@@ -474,8 +494,11 @@ export type Args<
474
494
  : "u32",
475
495
  > = LogProperties<T> & LogEvents<T> & SharedLogOptions<T, D, R>;
476
496
 
497
+ export type DeliveryReliability = "ack" | "best-effort";
498
+
477
499
  export type DeliveryOptions = {
478
- settle?: true | { min: number };
500
+ reliability?: DeliveryReliability;
501
+ minAcks?: number;
479
502
  requireRecipients?: boolean;
480
503
  timeout?: number;
481
504
  signal?: AbortSignal;
@@ -647,6 +670,11 @@ export class SharedLog<
647
670
  private replicationChangeDebounceFn!: ReturnType<
648
671
  typeof debounceAggregationChanges<ReplicationRangeIndexable<R>>
649
672
  >;
673
+ private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
674
+ private _recentRepairDispatch!: Map<string, Map<string, number>>;
675
+ private _repairSweepRunning!: boolean;
676
+ private _repairSweepForceFreshPending!: boolean;
677
+ private _repairSweepAddedPeersPending!: Set<string>;
650
678
 
651
679
  // regular distribution checks
652
680
  private distributeQueue?: PQueue;
@@ -663,6 +691,7 @@ export class SharedLog<
663
691
  waitForReplicatorRequestMaxAttempts?: number;
664
692
  waitForPruneDelay!: number;
665
693
  distributionDebounceTime!: number;
694
+ repairSweepTargetBufferSize!: number;
666
695
 
667
696
  replicationController!: PIDReplicationController;
668
697
  history!: { usedMemory: number; factor: number }[];
@@ -873,32 +902,34 @@ export class SharedLog<
873
902
  deliveryArg: false | true | DeliveryOptions | undefined,
874
903
  ): {
875
904
  delivery?: DeliveryOptions;
905
+ reliability: DeliveryReliability;
876
906
  requireRecipients: boolean;
877
- settleMin?: number;
907
+ minAcks?: number;
878
908
  wrap?: (promise: Promise<void>) => Promise<void>;
879
909
  } {
880
910
  const delivery: DeliveryOptions | undefined =
881
911
  deliveryArg === undefined || deliveryArg === false
882
912
  ? undefined
883
913
  : deliveryArg === true
884
- ? {}
914
+ ? { reliability: "ack" }
885
915
  : deliveryArg;
886
916
  if (!delivery) {
887
917
  return {
888
918
  delivery: undefined,
919
+ reliability: "best-effort",
889
920
  requireRecipients: false,
890
- settleMin: undefined,
921
+ minAcks: undefined,
891
922
  wrap: undefined,
892
923
  };
893
924
  }
894
925
 
895
- const deliverySettle = delivery.settle ?? true;
926
+ const reliability: DeliveryReliability = delivery.reliability ?? "ack";
896
927
  const deliveryTimeout = delivery.timeout;
897
928
  const deliverySignal = delivery.signal;
898
929
  const requireRecipients = delivery.requireRecipients === true;
899
- const settleMin =
900
- typeof deliverySettle === "object" && Number.isFinite(deliverySettle.min)
901
- ? Math.max(0, Math.floor(deliverySettle.min))
930
+ const minAcks =
931
+ delivery.minAcks != null && Number.isFinite(delivery.minAcks)
932
+ ? Math.max(0, Math.floor(delivery.minAcks))
902
933
  : undefined;
903
934
 
904
935
  const wrap =
@@ -967,12 +998,107 @@ export class SharedLog<
967
998
 
968
999
  return {
969
1000
  delivery,
1001
+ reliability,
970
1002
  requireRecipients,
971
- settleMin,
1003
+ minAcks,
972
1004
  wrap,
973
1005
  };
974
1006
  }
975
1007
 
1008
+ private async _getSortedRouteHints(
1009
+ targetHash: string,
1010
+ ): Promise<RouteHint[]> {
1011
+ const pubsub: any = this.node.services.pubsub as any;
1012
+ const maybeHints = await pubsub?.getUnifiedRouteHints?.(this.topic, targetHash);
1013
+ const hints: RouteHint[] = Array.isArray(maybeHints) ? maybeHints : [];
1014
+ const now = Date.now();
1015
+ return hints
1016
+ .filter((hint) => hint.expiresAt == null || hint.expiresAt > now)
1017
+ .sort((a, b) => {
1018
+ const rankA = a.kind === "directstream-ack" ? 0 : 1;
1019
+ const rankB = b.kind === "directstream-ack" ? 0 : 1;
1020
+ if (rankA !== rankB) {
1021
+ return rankA - rankB;
1022
+ }
1023
+
1024
+ const costA =
1025
+ a.kind === "directstream-ack"
1026
+ ? a.distance
1027
+ : Math.max(0, (a.route?.length ?? 1) - 1);
1028
+ const costB =
1029
+ b.kind === "directstream-ack"
1030
+ ? b.distance
1031
+ : Math.max(0, (b.route?.length ?? 1) - 1);
1032
+ if (costA !== costB) {
1033
+ return costA - costB;
1034
+ }
1035
+
1036
+ return (b.updatedAt ?? 0) - (a.updatedAt ?? 0);
1037
+ });
1038
+ }
1039
+
1040
+ private async _sendAckWithUnifiedHints(properties: {
1041
+ peer: string;
1042
+ message: ExchangeHeadsMessage<any>;
1043
+ payload: Uint8Array;
1044
+ fanoutUnicastOptions?: { timeoutMs?: number; signal?: AbortSignal };
1045
+ }): Promise<void> {
1046
+ const { peer, message, payload, fanoutUnicastOptions } = properties;
1047
+ const hints = await this._getSortedRouteHints(peer);
1048
+ const hasDirectHint = hints.some((hint) => hint.kind === "directstream-ack");
1049
+ const fanoutHint = hints.find(
1050
+ (hint): hint is Extract<RouteHint, { kind: "fanout-token" }> =>
1051
+ hint.kind === "fanout-token",
1052
+ );
1053
+
1054
+ if (hasDirectHint) {
1055
+ try {
1056
+ await this.rpc.send(message, {
1057
+ mode: new AcknowledgeDelivery({
1058
+ redundancy: 1,
1059
+ to: [peer],
1060
+ }),
1061
+ });
1062
+ return;
1063
+ } catch {
1064
+ // Fall back to fanout token/direct fanout unicast below.
1065
+ }
1066
+ }
1067
+
1068
+ if (fanoutHint && this._fanoutChannel) {
1069
+ try {
1070
+ await this._fanoutChannel.unicastAck(
1071
+ fanoutHint.route,
1072
+ payload,
1073
+ fanoutUnicastOptions,
1074
+ );
1075
+ return;
1076
+ } catch {
1077
+ // Fall back below.
1078
+ }
1079
+ }
1080
+
1081
+ if (this._fanoutChannel) {
1082
+ try {
1083
+ await this._fanoutChannel.unicastToAck(
1084
+ peer,
1085
+ payload,
1086
+ fanoutUnicastOptions,
1087
+ );
1088
+ return;
1089
+ } catch {
1090
+ // Fall back below.
1091
+ }
1092
+ }
1093
+
1094
+ await this.rpc.send(message, {
1095
+ mode: new AcknowledgeDelivery({
1096
+ redundancy: 1,
1097
+ to: [peer],
1098
+ }),
1099
+ });
1100
+ }
1101
+
976
1102
  private async _appendDeliverToReplicators(
977
1103
  entry: Entry<T>,
978
1104
  minReplicasValue: number,
@@ -981,7 +1107,7 @@ export class SharedLog<
981
1107
  isLeader: boolean,
982
1108
  deliveryArg: false | true | DeliveryOptions | undefined,
983
1109
  ) {
984
- const { delivery, requireRecipients, settleMin, wrap } =
1110
+ const { delivery, reliability, requireRecipients, minAcks, wrap } =
985
1111
  this._parseDeliveryOptions(deliveryArg);
986
1112
  const pending: Promise<void>[] = [];
987
1113
  const track = (promise: Promise<void>) => {
@@ -997,11 +1123,32 @@ export class SharedLog<
997
1123
  const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
998
1124
 
999
1125
  const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
1000
- const hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1126
+ let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1127
+ const allowSubscriberFallback =
1128
+ this.syncronizer instanceof SimpleSyncronizer ||
1129
+ (this.compatibility ?? Number.MAX_VALUE) < 10;
1130
+ if (!hasRemotePeers && allowSubscriberFallback) {
1131
+ try {
1132
+ const subscribers = await this._getTopicSubscribers(this.topic);
1133
+ if (subscribers && subscribers.length > 0) {
1134
+ for (const subscriber of subscribers) {
1135
+ const hash = subscriber.hashcode();
1136
+ if (hash === selfHash) {
1137
+ continue;
1138
+ }
1139
+ set.add(hash);
1140
+ leadersForDelivery?.add(hash);
1141
+ }
1142
+ hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1143
+ }
1144
+ } catch {
1145
+ // Best-effort only; keep discovered recipients as-is.
1146
+ }
1147
+ }
1001
1148
  if (!hasRemotePeers) {
1002
1149
  if (requireRecipients) {
1003
- throw new NoPeersError(this.rpc.topic);
1004
- }
1150
+ throw new NoPeersError(this.rpc.topic);
1151
+ }
1005
1152
  continue;
1006
1153
  }
1007
1154
 
@@ -1037,8 +1184,13 @@ export class SharedLog<
1037
1184
  let silentTo: string[] | undefined;
1038
1185
  // Default delivery semantics: require enough remote ACKs to reach the requested
1039
1186
  // replication degree (local append counts as 1).
1040
- const ackLimit =
1041
- settleMin == null ? Math.max(0, minReplicasValue - 1) : settleMin;
1187
+ const defaultMinAcks = Math.max(0, minReplicasValue - 1);
1188
+ const ackLimitRaw =
1189
+ reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
1190
+ const ackLimit = Math.max(
1191
+ 0,
1192
+ Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length),
1193
+ );
1042
1194
 
1043
1195
  for (const peer of orderedRemoteRecipients) {
1044
1196
  if (ackTo.length < ackLimit) {
@@ -1061,48 +1213,11 @@ export class SharedLog<
1061
1213
  for (const peer of ackTo) {
1062
1214
  track(
1063
1215
  (async () => {
1064
- // Unified decision point:
1065
- // - If we can prove a cheap direct path (connected or routed), use it.
1066
- // - Otherwise, fall back to the fanout unicast ACK path (bounded overlay routing).
1067
- // - If that fails, fall back to pubsub/RPC routing which may flood to discover routes.
1068
- const pubsub: any = this.node.services.pubsub as any;
1069
- const canDirectFast =
1070
- Boolean(pubsub?.peers?.get?.(peer)?.isWritable) ||
1071
- Boolean(
1072
- pubsub?.routes?.isReachable?.(
1073
- pubsub?.publicKeyHash,
1074
- peer,
1075
- 0,
1076
- ),
1077
- );
1078
-
1079
- if (canDirectFast) {
1080
- await this.rpc.send(message, {
1081
- mode: new AcknowledgeDelivery({
1082
- redundancy: 1,
1083
- to: [peer],
1084
- }),
1085
- });
1086
- return;
1087
- }
1088
-
1089
- if (this._fanoutChannel) {
1090
- try {
1091
- await this._fanoutChannel.unicastToAck(
1092
- peer,
1093
- payload,
1094
- fanoutUnicastOptions,
1095
- );
1096
- return;
1097
- } catch {
1098
- // fall back below
1099
- }
1100
- }
1101
- await this.rpc.send(message, {
1102
- mode: new AcknowledgeDelivery({
1103
- redundancy: 1,
1104
- to: [peer],
1105
- }),
1216
+ await this._sendAckWithUnifiedHints({
1217
+ peer,
1218
+ message,
1219
+ payload,
1220
+ fanoutUnicastOptions,
1106
1221
  });
1107
1222
  })(),
1108
1223
  );
@@ -1723,6 +1838,14 @@ export class SharedLog<
1723
1838
  this.pendingMaturity.delete(keyHash);
1724
1839
  }
1725
1840
 
1841
+ // Keep local sync/prune state consistent even when a peer disappears
1842
+ // through replication-info updates without a topic unsubscribe event.
1843
+ this.removePeerFromGidPeerHistory(keyHash);
1844
+ this._recentRepairDispatch.delete(keyHash);
1845
+ if (!isMe) {
1846
+ this.syncronizer.onPeerDisconnected(keyHash);
1847
+ }
1848
+
1726
1849
  if (!isMe) {
1727
1850
  this.rebalanceParticipationDebounced?.call();
1728
1851
  }
@@ -2207,6 +2330,218 @@ export class SharedLog<
2207
2330
  return set;
2208
2331
  }
2209
2332
 
2333
+ private dispatchMaybeMissingEntries(
2334
+ target: string,
2335
+ entries: Map<string, EntryReplicated<R>>,
2336
+ options?: {
2337
+ bypassRecentDedupe?: boolean;
2338
+ retryScheduleMs?: number[];
2339
+ forceFreshDelivery?: boolean;
2340
+ },
2341
+ ) {
2342
+ if (entries.size === 0) {
2343
+ return;
2344
+ }
2345
+
2346
+ const now = Date.now();
2347
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2348
+ if (!recentlyDispatchedByHash) {
2349
+ recentlyDispatchedByHash = new Map();
2350
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2351
+ }
2352
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2353
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2354
+ recentlyDispatchedByHash.delete(hash);
2355
+ }
2356
+ }
2357
+
2358
+ const filteredEntries =
2359
+ options?.bypassRecentDedupe === true
2360
+ ? new Map(entries)
2361
+ : new Map<string, EntryReplicated<any>>();
2362
+ if (options?.bypassRecentDedupe !== true) {
2363
+ for (const [hash, entry] of entries) {
2364
+ const prev = recentlyDispatchedByHash.get(hash);
2365
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2366
+ continue;
2367
+ }
2368
+ recentlyDispatchedByHash.set(hash, now);
2369
+ filteredEntries.set(hash, entry);
2370
+ }
2371
+ } else {
2372
+ for (const hash of entries.keys()) {
2373
+ recentlyDispatchedByHash.set(hash, now);
2374
+ }
2375
+ }
2376
+ if (filteredEntries.size === 0) {
2377
+ return;
2378
+ }
2379
+
2380
+ const run = () => {
2381
+ // For force-fresh churn repair we intentionally bypass rateless IBLT and
2382
+ // use simple hash-based sync. This path is a directed "push these hashes
2383
+ // to that peer" recovery flow; using simple sync here avoids occasional
2384
+ // single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
2385
+ if (
2386
+ options?.forceFreshDelivery &&
2387
+ this.syncronizer instanceof RatelessIBLTSynchronizer
2388
+ ) {
2389
+ return Promise.resolve(
2390
+ this.syncronizer.simple.onMaybeMissingEntries({
2391
+ entries: filteredEntries,
2392
+ targets: [target],
2393
+ }),
2394
+ ).catch((error: any) => logger.error(error));
2395
+ }
2396
+
2397
+ return Promise.resolve(
2398
+ this.syncronizer.onMaybeMissingEntries({
2399
+ entries: filteredEntries,
2400
+ targets: [target],
2401
+ }),
2402
+ ).catch((error: any) => logger.error(error));
2403
+ };
2404
+
2405
+ const retrySchedule =
2406
+ options?.retryScheduleMs && options.retryScheduleMs.length > 0
2407
+ ? options.retryScheduleMs
2408
+ : options?.forceFreshDelivery
2409
+ ? FORCE_FRESH_RETRY_SCHEDULE_MS
2410
+ : [0];
2411
+
2412
+ for (const delayMs of retrySchedule) {
2413
+ if (delayMs === 0) {
2414
+ void run();
2415
+ continue;
2416
+ }
2417
+ const timer = setTimeout(() => {
2418
+ this._repairRetryTimers.delete(timer);
2419
+ if (this.closed) {
2420
+ return;
2421
+ }
2422
+ void run();
2423
+ }, delayMs);
2424
+ timer.unref?.();
2425
+ this._repairRetryTimers.add(timer);
2426
+ }
2427
+ }
2428
+
2429
+ private scheduleRepairSweep(options: {
2430
+ forceFreshDelivery: boolean;
2431
+ addedPeers: Set<string>;
2432
+ }) {
2433
+ if (options.forceFreshDelivery) {
2434
+ this._repairSweepForceFreshPending = true;
2435
+ }
2436
+ for (const peer of options.addedPeers) {
2437
+ this._repairSweepAddedPeersPending.add(peer);
2438
+ }
2439
+ if (!this._repairSweepRunning && !this.closed) {
2440
+ this._repairSweepRunning = true;
2441
+ void this.runRepairSweep();
2442
+ }
2443
+ }
2444
+
2445
+ private async runRepairSweep() {
2446
+ try {
2447
+ while (!this.closed) {
2448
+ const forceFreshDelivery = this._repairSweepForceFreshPending;
2449
+ const addedPeers = new Set(this._repairSweepAddedPeersPending);
2450
+ this._repairSweepForceFreshPending = false;
2451
+ this._repairSweepAddedPeersPending.clear();
2452
+
2453
+ if (!forceFreshDelivery && addedPeers.size === 0) {
2454
+ return;
2455
+ }
2456
+
2457
+ const pendingByTarget = new Map<string, Map<string, EntryReplicated<any>>>();
2458
+ const flushTarget = (target: string) => {
2459
+ const entries = pendingByTarget.get(target);
2460
+ if (!entries || entries.size === 0) {
2461
+ return;
2462
+ }
2463
+ const isJoinWarmupTarget = addedPeers.has(target);
2464
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
2465
+ this.dispatchMaybeMissingEntries(target, entries, {
2466
+ bypassRecentDedupe,
2467
+ retryScheduleMs: isJoinWarmupTarget
2468
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
2469
+ : undefined,
2470
+ forceFreshDelivery,
2471
+ });
2472
+ pendingByTarget.delete(target);
2473
+ };
2474
+ const queueEntryForTarget = (
2475
+ target: string,
2476
+ entry: EntryReplicated<any>,
2477
+ ) => {
2478
+ let set = pendingByTarget.get(target);
2479
+ if (!set) {
2480
+ set = new Map();
2481
+ pendingByTarget.set(target, set);
2482
+ }
2483
+ if (set.has(entry.hash)) {
2484
+ return;
2485
+ }
2486
+ set.set(entry.hash, entry);
2487
+ if (set.size >= this.repairSweepTargetBufferSize) {
2488
+ flushTarget(target);
2489
+ }
2490
+ };
2491
+
2492
+ const iterator = this.entryCoordinatesIndex.iterate({});
2493
+ try {
2494
+ while (!this.closed && !iterator.done()) {
2495
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2496
+ for (const entry of entries) {
2497
+ const entryReplicated = entry.value;
2498
+ const currentPeers = await this.findLeaders(
2499
+ entryReplicated.coordinates,
2500
+ entryReplicated,
2501
+ { roleAge: 0 },
2502
+ );
2503
+ if (forceFreshDelivery) {
2504
+ for (const [currentPeer] of currentPeers) {
2505
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
2506
+ continue;
2507
+ }
2508
+ queueEntryForTarget(currentPeer, entryReplicated);
2509
+ }
2510
+ }
2511
+ if (addedPeers.size > 0) {
2512
+ for (const peer of addedPeers) {
2513
+ if (currentPeers.has(peer)) {
2514
+ queueEntryForTarget(peer, entryReplicated);
2515
+ }
2516
+ }
2517
+ }
2518
+ }
2519
+ }
2520
+ } finally {
2521
+ await iterator.close();
2522
+ }
2523
+
2524
+ for (const target of [...pendingByTarget.keys()]) {
2525
+ flushTarget(target);
2526
+ }
2527
+ }
2528
+ } catch (error: any) {
2529
+ if (!isNotStartedError(error)) {
2530
+ logger.error(`Repair sweep failed: ${error?.message ?? error}`);
2531
+ }
2532
+ } finally {
2533
+ this._repairSweepRunning = false;
2534
+ if (
2535
+ !this.closed &&
2536
+ (this._repairSweepForceFreshPending ||
2537
+ this._repairSweepAddedPeersPending.size > 0)
2538
+ ) {
2539
+ this._repairSweepRunning = true;
2540
+ void this.runRepairSweep();
2541
+ }
2542
+ }
2543
+ }
2544
+
2210
2545
  private async pruneDebouncedFnAddIfNotKeeping(args: {
2211
2546
  key: string;
2212
2547
  value: {
@@ -2428,10 +2763,15 @@ export class SharedLog<
2428
2763
  this._pendingIHave = new Map();
2429
2764
  this.latestReplicationInfoMessage = new Map();
2430
2765
  this._replicationInfoBlockedPeers = new Set();
2431
- this._replicationInfoRequestByPeer = new Map();
2432
- this._replicationInfoApplyQueueByPeer = new Map();
2433
- this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2434
- this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
2766
+ this._replicationInfoRequestByPeer = new Map();
2767
+ this._replicationInfoApplyQueueByPeer = new Map();
2768
+ this._repairRetryTimers = new Set();
2769
+ this._recentRepairDispatch = new Map();
2770
+ this._repairSweepRunning = false;
2771
+ this._repairSweepForceFreshPending = false;
2772
+ this._repairSweepAddedPeersPending = new Set();
2773
+ this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2774
+ this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
2435
2775
 
2436
2776
  this.uniqueReplicators = new Set();
2437
2777
  this._replicatorJoinEmitted = new Set();
@@ -2441,6 +2781,11 @@ export class SharedLog<
2441
2781
  this.oldestOpenTime = this.openTime;
2442
2782
  this.distributionDebounceTime =
2443
2783
  options?.distributionDebounceTime || DEFAULT_DISTRIBUTION_DEBOUNCE_TIME; // expect > 0
2784
+ this.repairSweepTargetBufferSize = toPositiveInteger(
2785
+ options?.sync?.repairSweepTargetBufferSize,
2786
+ REPAIR_SWEEP_TARGET_BUFFER_SIZE,
2787
+ "sync.repairSweepTargetBufferSize",
2788
+ );
2444
2789
 
2445
2790
  this.timeUntilRoleMaturity =
2446
2791
  options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
@@ -3197,6 +3542,14 @@ export class SharedLog<
3197
3542
  "unsubscribe",
3198
3543
  this._onUnsubscriptionFn,
3199
3544
  );
3545
+ for (const timer of this._repairRetryTimers) {
3546
+ clearTimeout(timer);
3547
+ }
3548
+ this._repairRetryTimers.clear();
3549
+ this._recentRepairDispatch.clear();
3550
+ this._repairSweepRunning = false;
3551
+ this._repairSweepForceFreshPending = false;
3552
+ this._repairSweepAddedPeersPending.clear();
3200
3553
 
3201
3554
  for (const [_k, v] of this._pendingDeletes) {
3202
3555
  v.clear();
@@ -3390,7 +3743,6 @@ export class SharedLog<
3390
3743
  if (filteredHeads.length === 0) {
3391
3744
  return;
3392
3745
  }
3393
-
3394
3746
  const groupedByGid = await groupByGid(filteredHeads);
3395
3747
  const promises: Promise<void>[] = [];
3396
3748
 
@@ -4382,6 +4734,7 @@ export class SharedLog<
4382
4734
  const timeout = options.timeout ?? this.waitForReplicatorTimeout;
4383
4735
 
4384
4736
  return new Promise((resolve, reject) => {
4737
+ let settled = false;
4385
4738
  const removeListeners = () => {
4386
4739
  this.events.removeEventListener("replication:change", roleListener);
4387
4740
  this.events.removeEventListener("replicator:mature", roleListener); // TODO replication:change event ?
@@ -4390,15 +4743,26 @@ export class SharedLog<
4390
4743
  abortListener,
4391
4744
  );
4392
4745
  };
4393
- const abortListener = () => {
4746
+ const settleResolve = (value: Map<string, { intersecting: boolean }> | false) => {
4747
+ if (settled) return;
4748
+ settled = true;
4749
+ removeListeners();
4750
+ clearTimeout(timer);
4751
+ resolve(value);
4752
+ };
4753
+ const settleReject = (error: unknown) => {
4754
+ if (settled) return;
4755
+ settled = true;
4394
4756
  removeListeners();
4395
4757
  clearTimeout(timer);
4396
- resolve(false);
4758
+ reject(error);
4759
+ };
4760
+ const abortListener = () => {
4761
+ settleResolve(false);
4397
4762
  };
4398
4763
 
4399
4764
  const timer = setTimeout(async () => {
4400
- removeListeners();
4401
- resolve(false);
4765
+ settleResolve(false);
4402
4766
  }, timeout);
4403
4767
 
4404
4768
  const check = async () => {
@@ -4422,19 +4786,22 @@ export class SharedLog<
4422
4786
  }
4423
4787
  options?.onLeader && leaderKeys.forEach(options.onLeader);
4424
4788
 
4425
- removeListeners();
4426
- clearTimeout(timer);
4427
- resolve(leaders);
4789
+ settleResolve(leaders);
4790
+ };
4791
+ const runCheck = () => {
4792
+ void check().catch((error) => {
4793
+ settleReject(error);
4794
+ });
4428
4795
  };
4429
4796
 
4430
4797
  const roleListener = () => {
4431
- check();
4798
+ runCheck();
4432
4799
  };
4433
4800
 
4434
4801
  this.events.addEventListener("replication:change", roleListener); // TODO replication:change event ?
4435
4802
  this.events.addEventListener("replicator:mature", roleListener); // TODO replication:change event ?
4436
4803
  this._closeController.signal.addEventListener("abort", abortListener);
4437
- check();
4804
+ runCheck();
4438
4805
  });
4439
4806
  }
4440
4807
 
@@ -4649,8 +5016,8 @@ export class SharedLog<
4649
5016
  const selfHash = this.node.identity.publicKey.hashcode();
4650
5017
 
4651
5018
  // Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
4652
- // Falling back to live pubsub subscribers can include non-replicators and can
4653
- // break delivery/availability when writers are not directly connected.
5019
+ // If it is still warming up (for example, only contains self), supplement with
5020
+ // current subscribers until we have enough candidates for this decision.
4654
5021
  let peerFilter: Set<string> | undefined = undefined;
4655
5022
  const selfReplicating = await this.isReplicating();
4656
5023
  if (this.uniqueReplicators.size > 0) {
@@ -4660,6 +5027,22 @@ export class SharedLog<
4660
5027
  } else {
4661
5028
  peerFilter.delete(selfHash);
4662
5029
  }
5030
+
5031
+ try {
5032
+ const subscribers = await this._getTopicSubscribers(this.topic);
5033
+ if (subscribers && subscribers.length > 0) {
5034
+ for (const subscriber of subscribers) {
5035
+ peerFilter.add(subscriber.hashcode());
5036
+ }
5037
+ if (selfReplicating) {
5038
+ peerFilter.add(selfHash);
5039
+ } else {
5040
+ peerFilter.delete(selfHash);
5041
+ }
5042
+ }
5043
+ } catch {
5044
+ // Best-effort only; keep current peerFilter.
5045
+ }
4663
5046
  } else {
4664
5047
  try {
4665
5048
  const subscribers =
@@ -4810,9 +5193,20 @@ export class SharedLog<
4810
5193
  }
4811
5194
 
4812
5195
  if (!subscribed) {
5196
+ const wasReplicator = this.uniqueReplicators.has(peerHash);
5197
+ try {
5198
+ // Unsubscribe can race with the peer's final replication reset message.
5199
+ // Proactively evict its ranges so leader selection doesn't keep stale owners.
5200
+ await this.removeReplicator(publicKey, { noEvent: true });
5201
+ } catch (error) {
5202
+ if (!isNotStartedError(error as Error)) {
5203
+ throw error;
5204
+ }
5205
+ }
5206
+
4813
5207
  // Emit replicator:leave at most once per (join -> leave) transition, even if we
4814
5208
  // concurrently process unsubscribe + replication reset messages for the same peer.
4815
- const stoppedTransition = this.uniqueReplicators.delete(peerHash);
5209
+ const stoppedTransition = wasReplicator;
4816
5210
  this._replicatorJoinEmitted.delete(peerHash);
4817
5211
 
4818
5212
  this.cancelReplicationInfoRequests(peerHash);
@@ -5302,9 +5696,9 @@ export class SharedLog<
5302
5696
  * that we potentially need to share with other peers
5303
5697
  */
5304
5698
 
5305
- if (this.closed) {
5306
- return;
5307
- }
5699
+ if (this.closed) {
5700
+ return;
5701
+ }
5308
5702
 
5309
5703
  await this.log.trim();
5310
5704
 
@@ -5312,23 +5706,94 @@ export class SharedLog<
5312
5706
  ? (changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>[])
5313
5707
  : [changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>];
5314
5708
  const changes = batchedChanges.flat();
5709
+ const selfHash = this.node.identity.publicKey.hashcode();
5315
5710
  // On removed ranges (peer leaves / shrink), gid-level history can hide
5316
5711
  // per-entry gaps. Force a fresh delivery pass for reassigned entries.
5317
- const forceFreshDelivery = changes.some((change) => change.type === "removed");
5712
+ const forceFreshDelivery = changes.some(
5713
+ (change) => change.type === "removed" && change.range.hash !== selfHash,
5714
+ );
5318
5715
  const gidPeersHistorySnapshot = new Map<string, Set<string> | undefined>();
5716
+ const dedupeCutoff = Date.now() - RECENT_REPAIR_DISPATCH_TTL_MS;
5717
+ for (const [target, hashes] of this._recentRepairDispatch) {
5718
+ for (const [hash, ts] of hashes) {
5719
+ if (ts <= dedupeCutoff) {
5720
+ hashes.delete(hash);
5721
+ }
5722
+ }
5723
+ if (hashes.size === 0) {
5724
+ this._recentRepairDispatch.delete(target);
5725
+ }
5726
+ }
5319
5727
 
5320
5728
  const changed = false;
5729
+ const replacedPeers = new Set<string>();
5730
+ for (const change of changes) {
5731
+ if (change.type === "replaced" && change.range.hash !== selfHash) {
5732
+ replacedPeers.add(change.range.hash);
5733
+ }
5734
+ }
5735
+ const addedPeers = new Set<string>();
5736
+ for (const change of changes) {
5737
+ if (change.type === "added" || change.type === "replaced") {
5738
+ const hash = change.range.hash;
5739
+ if (hash !== selfHash) {
5740
+ // Range updates can reassign entries to an existing peer shortly after it
5741
+ // already received a subset. Avoid suppressing legitimate follow-up repair.
5742
+ this._recentRepairDispatch.delete(hash);
5743
+ }
5744
+ }
5745
+ if (change.type === "added") {
5746
+ const hash = change.range.hash;
5747
+ if (hash !== selfHash && !replacedPeers.has(hash)) {
5748
+ addedPeers.add(hash);
5749
+ }
5750
+ }
5751
+ }
5321
5752
 
5322
5753
  try {
5323
5754
  const uncheckedDeliver: Map<
5324
5755
  string,
5325
5756
  Map<string, EntryReplicated<any>>
5326
5757
  > = new Map();
5758
+ const flushUncheckedDeliverTarget = (target: string) => {
5759
+ const entries = uncheckedDeliver.get(target);
5760
+ if (!entries || entries.size === 0) {
5761
+ return;
5762
+ }
5763
+ const isJoinWarmupTarget = addedPeers.has(target);
5764
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
5765
+ this.dispatchMaybeMissingEntries(target, entries, {
5766
+ bypassRecentDedupe,
5767
+ retryScheduleMs: isJoinWarmupTarget
5768
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
5769
+ : undefined,
5770
+ forceFreshDelivery,
5771
+ });
5772
+ uncheckedDeliver.delete(target);
5773
+ };
5774
+ const queueUncheckedDeliver = (
5775
+ target: string,
5776
+ entry: EntryReplicated<any>,
5777
+ ) => {
5778
+ let set = uncheckedDeliver.get(target);
5779
+ if (!set) {
5780
+ set = new Map();
5781
+ uncheckedDeliver.set(target, set);
5782
+ }
5783
+ if (set.has(entry.hash)) {
5784
+ return;
5785
+ }
5786
+ set.set(entry.hash, entry);
5787
+ if (set.size >= this.repairSweepTargetBufferSize) {
5788
+ flushUncheckedDeliverTarget(target);
5789
+ }
5790
+ };
5327
5791
 
5328
5792
  for await (const entryReplicated of toRebalance<R>(
5329
5793
  changes,
5330
5794
  this.entryCoordinatesIndex,
5331
5795
  this.recentlyRebalanced,
5796
+ { forceFresh: forceFreshDelivery },
5332
5797
  )) {
5333
5798
  if (this.closed) {
5334
5799
  break;
@@ -5356,24 +5821,16 @@ export class SharedLog<
5356
5821
  },
5357
5822
  );
5358
5823
 
5359
- for (const [currentPeer] of currentPeers) {
5360
- if (currentPeer === this.node.identity.publicKey.hashcode()) {
5361
- isLeader = true;
5362
- continue;
5363
- }
5364
-
5365
- if (!oldPeersSet?.has(currentPeer)) {
5366
- let set = uncheckedDeliver.get(currentPeer);
5367
- if (!set) {
5368
- set = new Map();
5369
- uncheckedDeliver.set(currentPeer, set);
5824
+ for (const [currentPeer] of currentPeers) {
5825
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
5826
+ isLeader = true;
5827
+ continue;
5370
5828
  }
5371
5829
 
5372
- if (!set.has(entryReplicated.hash)) {
5373
- set.set(entryReplicated.hash, entryReplicated);
5830
+ if (!oldPeersSet?.has(currentPeer)) {
5831
+ queueUncheckedDeliver(currentPeer, entryReplicated);
5374
5832
  }
5375
5833
  }
5376
- }
5377
5834
 
5378
5835
  if (oldPeersSet) {
5379
5836
  for (const oldPeer of oldPeersSet) {
@@ -5404,11 +5861,15 @@ export class SharedLog<
5404
5861
  this.removePruneRequestSent(entryReplicated.hash);
5405
5862
  }
5406
5863
  }
5407
- for (const [target, entries] of uncheckedDeliver) {
5408
- this.syncronizer.onMaybeMissingEntries({
5409
- entries,
5410
- targets: [target],
5411
- });
5864
+
5865
+ if (forceFreshDelivery || addedPeers.size > 0) {
5866
+ // Schedule a coalesced background sweep for churn/join windows instead of
5867
+ // scanning the whole index synchronously on each replication change.
5868
+ this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
5869
+ }
5870
+
5871
+ for (const target of [...uncheckedDeliver.keys()]) {
5872
+ flushUncheckedDeliverTarget(target);
5412
5873
  }
5413
5874
 
5414
5875
  return changed;
@@ -5422,51 +5883,52 @@ export class SharedLog<
5422
5883
  }
5423
5884
  }
5424
5885
 
5425
- async _onUnsubscription(evt: CustomEvent<UnsubcriptionEvent>) {
5426
- logger.trace(
5427
- `Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(
5428
- evt.detail.topics.map((x) => x),
5429
- )} '`,
5430
- );
5431
- if (!evt.detail.topics.includes(this.topic)) {
5432
- return;
5433
- }
5886
+ async _onUnsubscription(evt: CustomEvent<UnsubcriptionEvent>) {
5887
+ logger.trace(
5888
+ `Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(
5889
+ evt.detail.topics.map((x) => x),
5890
+ )} '`,
5891
+ );
5892
+ if (!evt.detail.topics.includes(this.topic)) {
5893
+ return;
5894
+ }
5434
5895
 
5435
- const fromHash = evt.detail.from.hashcode();
5436
- this._replicationInfoBlockedPeers.add(fromHash);
5896
+ const fromHash = evt.detail.from.hashcode();
5897
+ this._replicationInfoBlockedPeers.add(fromHash);
5898
+ this._recentRepairDispatch.delete(fromHash);
5899
+
5900
+ // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
5901
+ // prevents late/out-of-order replication-info messages from re-introducing
5902
+ // stale segments for a peer that has already left the topic.
5903
+ const now = BigInt(+new Date());
5904
+ const prev = this.latestReplicationInfoMessage.get(fromHash);
5905
+ if (!prev || prev < now) {
5906
+ this.latestReplicationInfoMessage.set(fromHash, now);
5907
+ }
5437
5908
 
5438
- // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
5439
- // prevents late/out-of-order replication-info messages from re-introducing
5440
- // stale segments for a peer that has already left the topic.
5441
- const now = BigInt(+new Date());
5442
- const prev = this.latestReplicationInfoMessage.get(fromHash);
5443
- if (!prev || prev < now) {
5444
- this.latestReplicationInfoMessage.set(fromHash, now);
5445
- }
5909
+ return this.handleSubscriptionChange(
5910
+ evt.detail.from,
5911
+ evt.detail.topics,
5912
+ false,
5913
+ );
5914
+ }
5446
5915
 
5447
- return this.handleSubscriptionChange(
5448
- evt.detail.from,
5449
- evt.detail.topics,
5450
- false,
5451
- );
5916
+ async _onSubscription(evt: CustomEvent<SubscriptionEvent>) {
5917
+ logger.trace(
5918
+ `New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(
5919
+ evt.detail.topics.map((x) => x),
5920
+ )}'`,
5921
+ );
5922
+ if (!evt.detail.topics.includes(this.topic)) {
5923
+ return;
5452
5924
  }
5453
5925
 
5454
- async _onSubscription(evt: CustomEvent<SubscriptionEvent>) {
5455
- logger.trace(
5456
- `New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(
5457
- evt.detail.topics.map((x) => x),
5458
- )}'`,
5459
- );
5460
- if (!evt.detail.topics.includes(this.topic)) {
5461
- return;
5462
- }
5463
-
5464
- this.remoteBlocks.onReachable(evt.detail.from);
5465
- this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
5926
+ this.remoteBlocks.onReachable(evt.detail.from);
5927
+ this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
5466
5928
 
5467
- return this.handleSubscriptionChange(
5468
- evt.detail.from,
5469
- evt.detail.topics,
5929
+ await this.handleSubscriptionChange(
5930
+ evt.detail.from,
5931
+ evt.detail.topics,
5470
5932
  true,
5471
5933
  );
5472
5934
  }