@peerbit/shared-log 12.3.5-1929680 → 12.3.5-42e98ce

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -53,6 +53,7 @@ import {
53
53
  DataMessage,
54
54
  MessageHeader,
55
55
  NotStartedError,
56
+ type RouteHint,
56
57
  SilentDelivery,
57
58
  } from "@peerbit/stream-interface";
58
59
  import {
@@ -440,6 +441,25 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
440
441
  const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
441
442
 
442
443
  const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
444
+ const RECENT_REPAIR_DISPATCH_TTL_MS = 5_000;
445
+ const REPAIR_SWEEP_ENTRY_BATCH_SIZE = 1_000;
446
+ const REPAIR_SWEEP_TARGET_BUFFER_SIZE = 1024;
447
+ const FORCE_FRESH_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
448
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000];
449
+
450
+ const toPositiveInteger = (
451
+ value: number | undefined,
452
+ fallback: number,
453
+ label: string,
454
+ ) => {
455
+ if (value == null) {
456
+ return fallback;
457
+ }
458
+ if (!Number.isFinite(value) || value <= 0) {
459
+ throw new Error(`${label} must be a positive number`);
460
+ }
461
+ return Math.max(1, Math.floor(value));
462
+ };
443
463
 
444
464
  const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS: Omit<
445
465
  FanoutTreeChannelOptions,
@@ -474,8 +494,11 @@ export type Args<
474
494
  : "u32",
475
495
  > = LogProperties<T> & LogEvents<T> & SharedLogOptions<T, D, R>;
476
496
 
497
+ export type DeliveryReliability = "ack" | "best-effort";
498
+
477
499
  export type DeliveryOptions = {
478
- settle?: true | { min: number };
500
+ reliability?: DeliveryReliability;
501
+ minAcks?: number;
479
502
  requireRecipients?: boolean;
480
503
  timeout?: number;
481
504
  signal?: AbortSignal;
@@ -647,6 +670,11 @@ export class SharedLog<
647
670
  private replicationChangeDebounceFn!: ReturnType<
648
671
  typeof debounceAggregationChanges<ReplicationRangeIndexable<R>>
649
672
  >;
673
+ private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
674
+ private _recentRepairDispatch!: Map<string, Map<string, number>>;
675
+ private _repairSweepRunning!: boolean;
676
+ private _repairSweepForceFreshPending!: boolean;
677
+ private _repairSweepAddedPeersPending!: Set<string>;
650
678
 
651
679
  // regular distribution checks
652
680
  private distributeQueue?: PQueue;
@@ -663,6 +691,7 @@ export class SharedLog<
663
691
  waitForReplicatorRequestMaxAttempts?: number;
664
692
  waitForPruneDelay!: number;
665
693
  distributionDebounceTime!: number;
694
+ repairSweepTargetBufferSize!: number;
666
695
 
667
696
  replicationController!: PIDReplicationController;
668
697
  history!: { usedMemory: number; factor: number }[];
@@ -873,32 +902,34 @@ export class SharedLog<
873
902
  deliveryArg: false | true | DeliveryOptions | undefined,
874
903
  ): {
875
904
  delivery?: DeliveryOptions;
905
+ reliability: DeliveryReliability;
876
906
  requireRecipients: boolean;
877
- settleMin?: number;
907
+ minAcks?: number;
878
908
  wrap?: (promise: Promise<void>) => Promise<void>;
879
909
  } {
880
910
  const delivery: DeliveryOptions | undefined =
881
911
  deliveryArg === undefined || deliveryArg === false
882
912
  ? undefined
883
913
  : deliveryArg === true
884
- ? {}
914
+ ? { reliability: "ack" }
885
915
  : deliveryArg;
886
916
  if (!delivery) {
887
917
  return {
888
918
  delivery: undefined,
919
+ reliability: "best-effort",
889
920
  requireRecipients: false,
890
- settleMin: undefined,
921
+ minAcks: undefined,
891
922
  wrap: undefined,
892
923
  };
893
924
  }
894
925
 
895
- const deliverySettle = delivery.settle ?? true;
926
+ const reliability: DeliveryReliability = delivery.reliability ?? "ack";
896
927
  const deliveryTimeout = delivery.timeout;
897
928
  const deliverySignal = delivery.signal;
898
929
  const requireRecipients = delivery.requireRecipients === true;
899
- const settleMin =
900
- typeof deliverySettle === "object" && Number.isFinite(deliverySettle.min)
901
- ? Math.max(0, Math.floor(deliverySettle.min))
930
+ const minAcks =
931
+ delivery.minAcks != null && Number.isFinite(delivery.minAcks)
932
+ ? Math.max(0, Math.floor(delivery.minAcks))
902
933
  : undefined;
903
934
 
904
935
  const wrap =
@@ -967,12 +998,107 @@ export class SharedLog<
967
998
 
968
999
  return {
969
1000
  delivery,
1001
+ reliability,
970
1002
  requireRecipients,
971
- settleMin,
1003
+ minAcks,
972
1004
  wrap,
973
1005
  };
974
1006
  }
975
1007
 
1008
+ private async _getSortedRouteHints(
1009
+ targetHash: string,
1010
+ ): Promise<RouteHint[]> {
1011
+ const pubsub: any = this.node.services.pubsub as any;
1012
+ const maybeHints = await pubsub?.getUnifiedRouteHints?.(this.topic, targetHash);
1013
+ const hints: RouteHint[] = Array.isArray(maybeHints) ? maybeHints : [];
1014
+ const now = Date.now();
1015
+ return hints
1016
+ .filter((hint) => hint.expiresAt == null || hint.expiresAt > now)
1017
+ .sort((a, b) => {
1018
+ const rankA = a.kind === "directstream-ack" ? 0 : 1;
1019
+ const rankB = b.kind === "directstream-ack" ? 0 : 1;
1020
+ if (rankA !== rankB) {
1021
+ return rankA - rankB;
1022
+ }
1023
+
1024
+ const costA =
1025
+ a.kind === "directstream-ack"
1026
+ ? a.distance
1027
+ : Math.max(0, (a.route?.length ?? 1) - 1);
1028
+ const costB =
1029
+ b.kind === "directstream-ack"
1030
+ ? b.distance
1031
+ : Math.max(0, (b.route?.length ?? 1) - 1);
1032
+ if (costA !== costB) {
1033
+ return costA - costB;
1034
+ }
1035
+
1036
+ return (b.updatedAt ?? 0) - (a.updatedAt ?? 0);
1037
+ });
1038
+ }
1039
+
1040
+ private async _sendAckWithUnifiedHints(properties: {
1041
+ peer: string;
1042
+ message: ExchangeHeadsMessage<any>;
1043
+ payload: Uint8Array;
1044
+ fanoutUnicastOptions?: { timeoutMs?: number; signal?: AbortSignal };
1045
+ }): Promise<void> {
1046
+ const { peer, message, payload, fanoutUnicastOptions } = properties;
1047
+ const hints = await this._getSortedRouteHints(peer);
1048
+ const hasDirectHint = hints.some((hint) => hint.kind === "directstream-ack");
1049
+ const fanoutHint = hints.find(
1050
+ (hint): hint is Extract<RouteHint, { kind: "fanout-token" }> =>
1051
+ hint.kind === "fanout-token",
1052
+ );
1053
+
1054
+ if (hasDirectHint) {
1055
+ try {
1056
+ await this.rpc.send(message, {
1057
+ mode: new AcknowledgeDelivery({
1058
+ redundancy: 1,
1059
+ to: [peer],
1060
+ }),
1061
+ });
1062
+ return;
1063
+ } catch {
1064
+ // Fall back to fanout token/direct fanout unicast below.
1065
+ }
1066
+ }
1067
+
1068
+ if (fanoutHint && this._fanoutChannel) {
1069
+ try {
1070
+ await this._fanoutChannel.unicastAck(
1071
+ fanoutHint.route,
1072
+ payload,
1073
+ fanoutUnicastOptions,
1074
+ );
1075
+ return;
1076
+ } catch {
1077
+ // Fall back below.
1078
+ }
1079
+ }
1080
+
1081
+ if (this._fanoutChannel) {
1082
+ try {
1083
+ await this._fanoutChannel.unicastToAck(
1084
+ peer,
1085
+ payload,
1086
+ fanoutUnicastOptions,
1087
+ );
1088
+ return;
1089
+ } catch {
1090
+ // Fall back below.
1091
+ }
1092
+ }
1093
+
1094
+ await this.rpc.send(message, {
1095
+ mode: new AcknowledgeDelivery({
1096
+ redundancy: 1,
1097
+ to: [peer],
1098
+ }),
1099
+ });
1100
+ }
1101
+
976
1102
  private async _appendDeliverToReplicators(
977
1103
  entry: Entry<T>,
978
1104
  minReplicasValue: number,
@@ -981,7 +1107,7 @@ export class SharedLog<
981
1107
  isLeader: boolean,
982
1108
  deliveryArg: false | true | DeliveryOptions | undefined,
983
1109
  ) {
984
- const { delivery, requireRecipients, settleMin, wrap } =
1110
+ const { delivery, reliability, requireRecipients, minAcks, wrap } =
985
1111
  this._parseDeliveryOptions(deliveryArg);
986
1112
  const pending: Promise<void>[] = [];
987
1113
  const track = (promise: Promise<void>) => {
@@ -997,11 +1123,32 @@ export class SharedLog<
997
1123
  const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
998
1124
 
999
1125
  const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
1000
- const hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1126
+ let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1127
+ const allowSubscriberFallback =
1128
+ this.syncronizer instanceof SimpleSyncronizer ||
1129
+ (this.compatibility ?? Number.MAX_VALUE) < 10;
1130
+ if (!hasRemotePeers && allowSubscriberFallback) {
1131
+ try {
1132
+ const subscribers = await this._getTopicSubscribers(this.topic);
1133
+ if (subscribers && subscribers.length > 0) {
1134
+ for (const subscriber of subscribers) {
1135
+ const hash = subscriber.hashcode();
1136
+ if (hash === selfHash) {
1137
+ continue;
1138
+ }
1139
+ set.add(hash);
1140
+ leadersForDelivery?.add(hash);
1141
+ }
1142
+ hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1143
+ }
1144
+ } catch {
1145
+ // Best-effort only; keep discovered recipients as-is.
1146
+ }
1147
+ }
1001
1148
  if (!hasRemotePeers) {
1002
1149
  if (requireRecipients) {
1003
- throw new NoPeersError(this.rpc.topic);
1004
- }
1150
+ throw new NoPeersError(this.rpc.topic);
1151
+ }
1005
1152
  continue;
1006
1153
  }
1007
1154
 
@@ -1037,8 +1184,13 @@ export class SharedLog<
1037
1184
  let silentTo: string[] | undefined;
1038
1185
  // Default delivery semantics: require enough remote ACKs to reach the requested
1039
1186
  // replication degree (local append counts as 1).
1040
- const ackLimit =
1041
- settleMin == null ? Math.max(0, minReplicasValue - 1) : settleMin;
1187
+ const defaultMinAcks = Math.max(0, minReplicasValue - 1);
1188
+ const ackLimitRaw =
1189
+ reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
1190
+ const ackLimit = Math.max(
1191
+ 0,
1192
+ Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length),
1193
+ );
1042
1194
 
1043
1195
  for (const peer of orderedRemoteRecipients) {
1044
1196
  if (ackTo.length < ackLimit) {
@@ -1061,48 +1213,11 @@ export class SharedLog<
1061
1213
  for (const peer of ackTo) {
1062
1214
  track(
1063
1215
  (async () => {
1064
- // Unified decision point:
1065
- // - If we can prove a cheap direct path (connected or routed), use it.
1066
- // - Otherwise, fall back to the fanout unicast ACK path (bounded overlay routing).
1067
- // - If that fails, fall back to pubsub/RPC routing which may flood to discover routes.
1068
- const pubsub: any = this.node.services.pubsub as any;
1069
- const canDirectFast =
1070
- Boolean(pubsub?.peers?.get?.(peer)?.isWritable) ||
1071
- Boolean(
1072
- pubsub?.routes?.isReachable?.(
1073
- pubsub?.publicKeyHash,
1074
- peer,
1075
- 0,
1076
- ),
1077
- );
1078
-
1079
- if (canDirectFast) {
1080
- await this.rpc.send(message, {
1081
- mode: new AcknowledgeDelivery({
1082
- redundancy: 1,
1083
- to: [peer],
1084
- }),
1085
- });
1086
- return;
1087
- }
1088
-
1089
- if (this._fanoutChannel) {
1090
- try {
1091
- await this._fanoutChannel.unicastToAck(
1092
- peer,
1093
- payload,
1094
- fanoutUnicastOptions,
1095
- );
1096
- return;
1097
- } catch {
1098
- // fall back below
1099
- }
1100
- }
1101
- await this.rpc.send(message, {
1102
- mode: new AcknowledgeDelivery({
1103
- redundancy: 1,
1104
- to: [peer],
1105
- }),
1216
+ await this._sendAckWithUnifiedHints({
1217
+ peer,
1218
+ message,
1219
+ payload,
1220
+ fanoutUnicastOptions,
1106
1221
  });
1107
1222
  })(),
1108
1223
  );
@@ -1723,6 +1838,14 @@ export class SharedLog<
1723
1838
  this.pendingMaturity.delete(keyHash);
1724
1839
  }
1725
1840
 
1841
+ // Keep local sync/prune state consistent even when a peer disappears
1842
+ // through replication-info updates without a topic unsubscribe event.
1843
+ this.removePeerFromGidPeerHistory(keyHash);
1844
+ this._recentRepairDispatch.delete(keyHash);
1845
+ if (!isMe) {
1846
+ this.syncronizer.onPeerDisconnected(keyHash);
1847
+ }
1848
+
1726
1849
  if (!isMe) {
1727
1850
  this.rebalanceParticipationDebounced?.call();
1728
1851
  }
@@ -2207,6 +2330,201 @@ export class SharedLog<
2207
2330
  return set;
2208
2331
  }
2209
2332
 
2333
+ private dispatchMaybeMissingEntries(
2334
+ target: string,
2335
+ entries: Map<string, EntryReplicated<any>>,
2336
+ options?: {
2337
+ bypassRecentDedupe?: boolean;
2338
+ retryScheduleMs?: number[];
2339
+ forceFreshDelivery?: boolean;
2340
+ },
2341
+ ) {
2342
+ if (entries.size === 0) {
2343
+ return;
2344
+ }
2345
+
2346
+ const now = Date.now();
2347
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2348
+ if (!recentlyDispatchedByHash) {
2349
+ recentlyDispatchedByHash = new Map();
2350
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2351
+ }
2352
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2353
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2354
+ recentlyDispatchedByHash.delete(hash);
2355
+ }
2356
+ }
2357
+
2358
+ const filteredEntries =
2359
+ options?.bypassRecentDedupe === true
2360
+ ? new Map(entries)
2361
+ : new Map<string, EntryReplicated<any>>();
2362
+ if (options?.bypassRecentDedupe !== true) {
2363
+ for (const [hash, entry] of entries) {
2364
+ const prev = recentlyDispatchedByHash.get(hash);
2365
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2366
+ continue;
2367
+ }
2368
+ recentlyDispatchedByHash.set(hash, now);
2369
+ filteredEntries.set(hash, entry);
2370
+ }
2371
+ } else {
2372
+ for (const hash of entries.keys()) {
2373
+ recentlyDispatchedByHash.set(hash, now);
2374
+ }
2375
+ }
2376
+ if (filteredEntries.size === 0) {
2377
+ return;
2378
+ }
2379
+
2380
+ const run = () =>
2381
+ Promise.resolve(
2382
+ this.syncronizer.onMaybeMissingEntries({
2383
+ entries: filteredEntries,
2384
+ targets: [target],
2385
+ }),
2386
+ ).catch((error: any) => logger.error(error));
2387
+
2388
+ const retrySchedule =
2389
+ options?.retryScheduleMs && options.retryScheduleMs.length > 0
2390
+ ? options.retryScheduleMs
2391
+ : options?.forceFreshDelivery
2392
+ ? FORCE_FRESH_RETRY_SCHEDULE_MS
2393
+ : [0];
2394
+
2395
+ for (const delayMs of retrySchedule) {
2396
+ if (delayMs === 0) {
2397
+ void run();
2398
+ continue;
2399
+ }
2400
+ const timer = setTimeout(() => {
2401
+ this._repairRetryTimers.delete(timer);
2402
+ if (this.closed) {
2403
+ return;
2404
+ }
2405
+ void run();
2406
+ }, delayMs);
2407
+ timer.unref?.();
2408
+ this._repairRetryTimers.add(timer);
2409
+ }
2410
+ }
2411
+
2412
+ private scheduleRepairSweep(options: {
2413
+ forceFreshDelivery: boolean;
2414
+ addedPeers: Set<string>;
2415
+ }) {
2416
+ if (options.forceFreshDelivery) {
2417
+ this._repairSweepForceFreshPending = true;
2418
+ }
2419
+ for (const peer of options.addedPeers) {
2420
+ this._repairSweepAddedPeersPending.add(peer);
2421
+ }
2422
+ if (!this._repairSweepRunning && !this.closed) {
2423
+ this._repairSweepRunning = true;
2424
+ void this.runRepairSweep();
2425
+ }
2426
+ }
2427
+
2428
+ private async runRepairSweep() {
2429
+ try {
2430
+ while (!this.closed) {
2431
+ const forceFreshDelivery = this._repairSweepForceFreshPending;
2432
+ const addedPeers = new Set(this._repairSweepAddedPeersPending);
2433
+ this._repairSweepForceFreshPending = false;
2434
+ this._repairSweepAddedPeersPending.clear();
2435
+
2436
+ if (!forceFreshDelivery && addedPeers.size === 0) {
2437
+ return;
2438
+ }
2439
+
2440
+ const pendingByTarget = new Map<string, Map<string, EntryReplicated<any>>>();
2441
+ const flushTarget = (target: string) => {
2442
+ const entries = pendingByTarget.get(target);
2443
+ if (!entries || entries.size === 0) {
2444
+ return;
2445
+ }
2446
+ const isJoinWarmupTarget = addedPeers.has(target);
2447
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
2448
+ this.dispatchMaybeMissingEntries(target, entries, {
2449
+ bypassRecentDedupe,
2450
+ retryScheduleMs: isJoinWarmupTarget
2451
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
2452
+ : undefined,
2453
+ forceFreshDelivery,
2454
+ });
2455
+ pendingByTarget.delete(target);
2456
+ };
2457
+ const queueEntryForTarget = (
2458
+ target: string,
2459
+ entry: EntryReplicated<any>,
2460
+ ) => {
2461
+ let set = pendingByTarget.get(target);
2462
+ if (!set) {
2463
+ set = new Map();
2464
+ pendingByTarget.set(target, set);
2465
+ }
2466
+ if (set.has(entry.hash)) {
2467
+ return;
2468
+ }
2469
+ set.set(entry.hash, entry);
2470
+ if (set.size >= this.repairSweepTargetBufferSize) {
2471
+ flushTarget(target);
2472
+ }
2473
+ };
2474
+
2475
+ const iterator = this.entryCoordinatesIndex.iterate({});
2476
+ try {
2477
+ while (!this.closed && !iterator.done()) {
2478
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2479
+ for (const entry of entries) {
2480
+ const entryReplicated = entry.value;
2481
+ const currentPeers = await this.findLeaders(
2482
+ entryReplicated.coordinates,
2483
+ entryReplicated,
2484
+ { roleAge: 0 },
2485
+ );
2486
+ if (forceFreshDelivery) {
2487
+ for (const [currentPeer] of currentPeers) {
2488
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
2489
+ continue;
2490
+ }
2491
+ queueEntryForTarget(currentPeer, entryReplicated);
2492
+ }
2493
+ }
2494
+ if (addedPeers.size > 0) {
2495
+ for (const peer of addedPeers) {
2496
+ if (currentPeers.has(peer)) {
2497
+ queueEntryForTarget(peer, entryReplicated);
2498
+ }
2499
+ }
2500
+ }
2501
+ }
2502
+ }
2503
+ } finally {
2504
+ await iterator.close();
2505
+ }
2506
+
2507
+ for (const target of [...pendingByTarget.keys()]) {
2508
+ flushTarget(target);
2509
+ }
2510
+ }
2511
+ } catch (error: any) {
2512
+ if (!isNotStartedError(error)) {
2513
+ logger.error(`Repair sweep failed: ${error?.message ?? error}`);
2514
+ }
2515
+ } finally {
2516
+ this._repairSweepRunning = false;
2517
+ if (
2518
+ !this.closed &&
2519
+ (this._repairSweepForceFreshPending ||
2520
+ this._repairSweepAddedPeersPending.size > 0)
2521
+ ) {
2522
+ this._repairSweepRunning = true;
2523
+ void this.runRepairSweep();
2524
+ }
2525
+ }
2526
+ }
2527
+
2210
2528
  private async pruneDebouncedFnAddIfNotKeeping(args: {
2211
2529
  key: string;
2212
2530
  value: {
@@ -2428,10 +2746,15 @@ export class SharedLog<
2428
2746
  this._pendingIHave = new Map();
2429
2747
  this.latestReplicationInfoMessage = new Map();
2430
2748
  this._replicationInfoBlockedPeers = new Set();
2431
- this._replicationInfoRequestByPeer = new Map();
2432
- this._replicationInfoApplyQueueByPeer = new Map();
2433
- this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2434
- this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
2749
+ this._replicationInfoRequestByPeer = new Map();
2750
+ this._replicationInfoApplyQueueByPeer = new Map();
2751
+ this._repairRetryTimers = new Set();
2752
+ this._recentRepairDispatch = new Map();
2753
+ this._repairSweepRunning = false;
2754
+ this._repairSweepForceFreshPending = false;
2755
+ this._repairSweepAddedPeersPending = new Set();
2756
+ this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2757
+ this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
2435
2758
 
2436
2759
  this.uniqueReplicators = new Set();
2437
2760
  this._replicatorJoinEmitted = new Set();
@@ -2441,6 +2764,11 @@ export class SharedLog<
2441
2764
  this.oldestOpenTime = this.openTime;
2442
2765
  this.distributionDebounceTime =
2443
2766
  options?.distributionDebounceTime || DEFAULT_DISTRIBUTION_DEBOUNCE_TIME; // expect > 0
2767
+ this.repairSweepTargetBufferSize = toPositiveInteger(
2768
+ options?.sync?.repairSweepTargetBufferSize,
2769
+ REPAIR_SWEEP_TARGET_BUFFER_SIZE,
2770
+ "sync.repairSweepTargetBufferSize",
2771
+ );
2444
2772
 
2445
2773
  this.timeUntilRoleMaturity =
2446
2774
  options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
@@ -3197,6 +3525,14 @@ export class SharedLog<
3197
3525
  "unsubscribe",
3198
3526
  this._onUnsubscriptionFn,
3199
3527
  );
3528
+ for (const timer of this._repairRetryTimers) {
3529
+ clearTimeout(timer);
3530
+ }
3531
+ this._repairRetryTimers.clear();
3532
+ this._recentRepairDispatch.clear();
3533
+ this._repairSweepRunning = false;
3534
+ this._repairSweepForceFreshPending = false;
3535
+ this._repairSweepAddedPeersPending.clear();
3200
3536
 
3201
3537
  for (const [_k, v] of this._pendingDeletes) {
3202
3538
  v.clear();
@@ -3390,7 +3726,6 @@ export class SharedLog<
3390
3726
  if (filteredHeads.length === 0) {
3391
3727
  return;
3392
3728
  }
3393
-
3394
3729
  const groupedByGid = await groupByGid(filteredHeads);
3395
3730
  const promises: Promise<void>[] = [];
3396
3731
 
@@ -4664,8 +4999,8 @@ export class SharedLog<
4664
4999
  const selfHash = this.node.identity.publicKey.hashcode();
4665
5000
 
4666
5001
  // Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
4667
- // Falling back to live pubsub subscribers can include non-replicators and can
4668
- // break delivery/availability when writers are not directly connected.
5002
+ // If it is still warming up (for example, only contains self), supplement with
5003
+ // current subscribers until we have enough candidates for this decision.
4669
5004
  let peerFilter: Set<string> | undefined = undefined;
4670
5005
  const selfReplicating = await this.isReplicating();
4671
5006
  if (this.uniqueReplicators.size > 0) {
@@ -4675,6 +5010,22 @@ export class SharedLog<
4675
5010
  } else {
4676
5011
  peerFilter.delete(selfHash);
4677
5012
  }
5013
+
5014
+ try {
5015
+ const subscribers = await this._getTopicSubscribers(this.topic);
5016
+ if (subscribers && subscribers.length > 0) {
5017
+ for (const subscriber of subscribers) {
5018
+ peerFilter.add(subscriber.hashcode());
5019
+ }
5020
+ if (selfReplicating) {
5021
+ peerFilter.add(selfHash);
5022
+ } else {
5023
+ peerFilter.delete(selfHash);
5024
+ }
5025
+ }
5026
+ } catch {
5027
+ // Best-effort only; keep current peerFilter.
5028
+ }
4678
5029
  } else {
4679
5030
  try {
4680
5031
  const subscribers =
@@ -4825,9 +5176,20 @@ export class SharedLog<
4825
5176
  }
4826
5177
 
4827
5178
  if (!subscribed) {
5179
+ const wasReplicator = this.uniqueReplicators.has(peerHash);
5180
+ try {
5181
+ // Unsubscribe can race with the peer's final replication reset message.
5182
+ // Proactively evict its ranges so leader selection doesn't keep stale owners.
5183
+ await this.removeReplicator(publicKey, { noEvent: true });
5184
+ } catch (error) {
5185
+ if (!isNotStartedError(error as Error)) {
5186
+ throw error;
5187
+ }
5188
+ }
5189
+
4828
5190
  // Emit replicator:leave at most once per (join -> leave) transition, even if we
4829
5191
  // concurrently process unsubscribe + replication reset messages for the same peer.
4830
- const stoppedTransition = this.uniqueReplicators.delete(peerHash);
5192
+ const stoppedTransition = wasReplicator;
4831
5193
  this._replicatorJoinEmitted.delete(peerHash);
4832
5194
 
4833
5195
  this.cancelReplicationInfoRequests(peerHash);
@@ -5317,9 +5679,9 @@ export class SharedLog<
5317
5679
  * that we potentially need to share with other peers
5318
5680
  */
5319
5681
 
5320
- if (this.closed) {
5321
- return;
5322
- }
5682
+ if (this.closed) {
5683
+ return;
5684
+ }
5323
5685
 
5324
5686
  await this.log.trim();
5325
5687
 
@@ -5327,23 +5689,94 @@ export class SharedLog<
5327
5689
  ? (changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>[])
5328
5690
  : [changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>];
5329
5691
  const changes = batchedChanges.flat();
5692
+ const selfHash = this.node.identity.publicKey.hashcode();
5330
5693
  // On removed ranges (peer leaves / shrink), gid-level history can hide
5331
5694
  // per-entry gaps. Force a fresh delivery pass for reassigned entries.
5332
- const forceFreshDelivery = changes.some((change) => change.type === "removed");
5695
+ const forceFreshDelivery = changes.some(
5696
+ (change) => change.type === "removed" && change.range.hash !== selfHash,
5697
+ );
5333
5698
  const gidPeersHistorySnapshot = new Map<string, Set<string> | undefined>();
5699
+ const dedupeCutoff = Date.now() - RECENT_REPAIR_DISPATCH_TTL_MS;
5700
+ for (const [target, hashes] of this._recentRepairDispatch) {
5701
+ for (const [hash, ts] of hashes) {
5702
+ if (ts <= dedupeCutoff) {
5703
+ hashes.delete(hash);
5704
+ }
5705
+ }
5706
+ if (hashes.size === 0) {
5707
+ this._recentRepairDispatch.delete(target);
5708
+ }
5709
+ }
5334
5710
 
5335
5711
  const changed = false;
5712
+ const replacedPeers = new Set<string>();
5713
+ for (const change of changes) {
5714
+ if (change.type === "replaced" && change.range.hash !== selfHash) {
5715
+ replacedPeers.add(change.range.hash);
5716
+ }
5717
+ }
5718
+ const addedPeers = new Set<string>();
5719
+ for (const change of changes) {
5720
+ if (change.type === "added" || change.type === "replaced") {
5721
+ const hash = change.range.hash;
5722
+ if (hash !== selfHash) {
5723
+ // Range updates can reassign entries to an existing peer shortly after it
5724
+ // already received a subset. Avoid suppressing legitimate follow-up repair.
5725
+ this._recentRepairDispatch.delete(hash);
5726
+ }
5727
+ }
5728
+ if (change.type === "added") {
5729
+ const hash = change.range.hash;
5730
+ if (hash !== selfHash && !replacedPeers.has(hash)) {
5731
+ addedPeers.add(hash);
5732
+ }
5733
+ }
5734
+ }
5336
5735
 
5337
5736
  try {
5338
5737
  const uncheckedDeliver: Map<
5339
5738
  string,
5340
5739
  Map<string, EntryReplicated<any>>
5341
5740
  > = new Map();
5741
+ const flushUncheckedDeliverTarget = (target: string) => {
5742
+ const entries = uncheckedDeliver.get(target);
5743
+ if (!entries || entries.size === 0) {
5744
+ return;
5745
+ }
5746
+ const isJoinWarmupTarget = addedPeers.has(target);
5747
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
5748
+ this.dispatchMaybeMissingEntries(target, entries, {
5749
+ bypassRecentDedupe,
5750
+ retryScheduleMs: isJoinWarmupTarget
5751
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
5752
+ : undefined,
5753
+ forceFreshDelivery,
5754
+ });
5755
+ uncheckedDeliver.delete(target);
5756
+ };
5757
+ const queueUncheckedDeliver = (
5758
+ target: string,
5759
+ entry: EntryReplicated<any>,
5760
+ ) => {
5761
+ let set = uncheckedDeliver.get(target);
5762
+ if (!set) {
5763
+ set = new Map();
5764
+ uncheckedDeliver.set(target, set);
5765
+ }
5766
+ if (set.has(entry.hash)) {
5767
+ return;
5768
+ }
5769
+ set.set(entry.hash, entry);
5770
+ if (set.size >= this.repairSweepTargetBufferSize) {
5771
+ flushUncheckedDeliverTarget(target);
5772
+ }
5773
+ };
5342
5774
 
5343
5775
  for await (const entryReplicated of toRebalance<R>(
5344
5776
  changes,
5345
5777
  this.entryCoordinatesIndex,
5346
5778
  this.recentlyRebalanced,
5779
+ { forceFresh: forceFreshDelivery },
5347
5780
  )) {
5348
5781
  if (this.closed) {
5349
5782
  break;
@@ -5371,24 +5804,16 @@ export class SharedLog<
5371
5804
  },
5372
5805
  );
5373
5806
 
5374
- for (const [currentPeer] of currentPeers) {
5375
- if (currentPeer === this.node.identity.publicKey.hashcode()) {
5376
- isLeader = true;
5377
- continue;
5378
- }
5379
-
5380
- if (!oldPeersSet?.has(currentPeer)) {
5381
- let set = uncheckedDeliver.get(currentPeer);
5382
- if (!set) {
5383
- set = new Map();
5384
- uncheckedDeliver.set(currentPeer, set);
5807
+ for (const [currentPeer] of currentPeers) {
5808
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
5809
+ isLeader = true;
5810
+ continue;
5385
5811
  }
5386
5812
 
5387
- if (!set.has(entryReplicated.hash)) {
5388
- set.set(entryReplicated.hash, entryReplicated);
5813
+ if (!oldPeersSet?.has(currentPeer)) {
5814
+ queueUncheckedDeliver(currentPeer, entryReplicated);
5389
5815
  }
5390
5816
  }
5391
- }
5392
5817
 
5393
5818
  if (oldPeersSet) {
5394
5819
  for (const oldPeer of oldPeersSet) {
@@ -5419,11 +5844,15 @@ export class SharedLog<
5419
5844
  this.removePruneRequestSent(entryReplicated.hash);
5420
5845
  }
5421
5846
  }
5422
- for (const [target, entries] of uncheckedDeliver) {
5423
- this.syncronizer.onMaybeMissingEntries({
5424
- entries,
5425
- targets: [target],
5426
- });
5847
+
5848
+ if (forceFreshDelivery || addedPeers.size > 0) {
5849
+ // Schedule a coalesced background sweep for churn/join windows instead of
5850
+ // scanning the whole index synchronously on each replication change.
5851
+ this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
5852
+ }
5853
+
5854
+ for (const target of [...uncheckedDeliver.keys()]) {
5855
+ flushUncheckedDeliverTarget(target);
5427
5856
  }
5428
5857
 
5429
5858
  return changed;
@@ -5437,51 +5866,52 @@ export class SharedLog<
5437
5866
  }
5438
5867
  }
5439
5868
 
5440
- async _onUnsubscription(evt: CustomEvent<UnsubcriptionEvent>) {
5441
- logger.trace(
5442
- `Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(
5443
- evt.detail.topics.map((x) => x),
5444
- )} '`,
5445
- );
5446
- if (!evt.detail.topics.includes(this.topic)) {
5447
- return;
5448
- }
5869
+ async _onUnsubscription(evt: CustomEvent<UnsubcriptionEvent>) {
5870
+ logger.trace(
5871
+ `Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(
5872
+ evt.detail.topics.map((x) => x),
5873
+ )} '`,
5874
+ );
5875
+ if (!evt.detail.topics.includes(this.topic)) {
5876
+ return;
5877
+ }
5449
5878
 
5450
- const fromHash = evt.detail.from.hashcode();
5451
- this._replicationInfoBlockedPeers.add(fromHash);
5879
+ const fromHash = evt.detail.from.hashcode();
5880
+ this._replicationInfoBlockedPeers.add(fromHash);
5881
+ this._recentRepairDispatch.delete(fromHash);
5882
+
5883
+ // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
5884
+ // prevents late/out-of-order replication-info messages from re-introducing
5885
+ // stale segments for a peer that has already left the topic.
5886
+ const now = BigInt(+new Date());
5887
+ const prev = this.latestReplicationInfoMessage.get(fromHash);
5888
+ if (!prev || prev < now) {
5889
+ this.latestReplicationInfoMessage.set(fromHash, now);
5890
+ }
5452
5891
 
5453
- // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
5454
- // prevents late/out-of-order replication-info messages from re-introducing
5455
- // stale segments for a peer that has already left the topic.
5456
- const now = BigInt(+new Date());
5457
- const prev = this.latestReplicationInfoMessage.get(fromHash);
5458
- if (!prev || prev < now) {
5459
- this.latestReplicationInfoMessage.set(fromHash, now);
5460
- }
5892
+ return this.handleSubscriptionChange(
5893
+ evt.detail.from,
5894
+ evt.detail.topics,
5895
+ false,
5896
+ );
5897
+ }
5461
5898
 
5462
- return this.handleSubscriptionChange(
5463
- evt.detail.from,
5464
- evt.detail.topics,
5465
- false,
5466
- );
5899
+ async _onSubscription(evt: CustomEvent<SubscriptionEvent>) {
5900
+ logger.trace(
5901
+ `New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(
5902
+ evt.detail.topics.map((x) => x),
5903
+ )}'`,
5904
+ );
5905
+ if (!evt.detail.topics.includes(this.topic)) {
5906
+ return;
5467
5907
  }
5468
5908
 
5469
- async _onSubscription(evt: CustomEvent<SubscriptionEvent>) {
5470
- logger.trace(
5471
- `New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(
5472
- evt.detail.topics.map((x) => x),
5473
- )}'`,
5474
- );
5475
- if (!evt.detail.topics.includes(this.topic)) {
5476
- return;
5477
- }
5478
-
5479
- this.remoteBlocks.onReachable(evt.detail.from);
5480
- this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
5909
+ this.remoteBlocks.onReachable(evt.detail.from);
5910
+ this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
5481
5911
 
5482
- return this.handleSubscriptionChange(
5483
- evt.detail.from,
5484
- evt.detail.topics,
5912
+ await this.handleSubscriptionChange(
5913
+ evt.detail.from,
5914
+ evt.detail.topics,
5485
5915
  true,
5486
5916
  );
5487
5917
  }