@peerbit/shared-log 12.3.5-3f16953 → 12.3.5-42e98ce

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -53,6 +53,7 @@ import {
53
53
  DataMessage,
54
54
  MessageHeader,
55
55
  NotStartedError,
56
+ type RouteHint,
56
57
  SilentDelivery,
57
58
  } from "@peerbit/stream-interface";
58
59
  import {
@@ -440,6 +441,25 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
440
441
  const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
441
442
 
442
443
  const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
444
+ const RECENT_REPAIR_DISPATCH_TTL_MS = 5_000;
445
+ const REPAIR_SWEEP_ENTRY_BATCH_SIZE = 1_000;
446
+ const REPAIR_SWEEP_TARGET_BUFFER_SIZE = 1024;
447
+ const FORCE_FRESH_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
448
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000];
449
+
450
+ const toPositiveInteger = (
451
+ value: number | undefined,
452
+ fallback: number,
453
+ label: string,
454
+ ) => {
455
+ if (value == null) {
456
+ return fallback;
457
+ }
458
+ if (!Number.isFinite(value) || value <= 0) {
459
+ throw new Error(`${label} must be a positive number`);
460
+ }
461
+ return Math.max(1, Math.floor(value));
462
+ };
443
463
 
444
464
  const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS: Omit<
445
465
  FanoutTreeChannelOptions,
@@ -474,8 +494,11 @@ export type Args<
474
494
  : "u32",
475
495
  > = LogProperties<T> & LogEvents<T> & SharedLogOptions<T, D, R>;
476
496
 
497
+ export type DeliveryReliability = "ack" | "best-effort";
498
+
477
499
  export type DeliveryOptions = {
478
- settle?: true | { min: number };
500
+ reliability?: DeliveryReliability;
501
+ minAcks?: number;
479
502
  requireRecipients?: boolean;
480
503
  timeout?: number;
481
504
  signal?: AbortSignal;
@@ -647,6 +670,11 @@ export class SharedLog<
647
670
  private replicationChangeDebounceFn!: ReturnType<
648
671
  typeof debounceAggregationChanges<ReplicationRangeIndexable<R>>
649
672
  >;
673
+ private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
674
+ private _recentRepairDispatch!: Map<string, Map<string, number>>;
675
+ private _repairSweepRunning!: boolean;
676
+ private _repairSweepForceFreshPending!: boolean;
677
+ private _repairSweepAddedPeersPending!: Set<string>;
650
678
 
651
679
  // regular distribution checks
652
680
  private distributeQueue?: PQueue;
@@ -663,6 +691,7 @@ export class SharedLog<
663
691
  waitForReplicatorRequestMaxAttempts?: number;
664
692
  waitForPruneDelay!: number;
665
693
  distributionDebounceTime!: number;
694
+ repairSweepTargetBufferSize!: number;
666
695
 
667
696
  replicationController!: PIDReplicationController;
668
697
  history!: { usedMemory: number; factor: number }[];
@@ -873,32 +902,34 @@ export class SharedLog<
873
902
  deliveryArg: false | true | DeliveryOptions | undefined,
874
903
  ): {
875
904
  delivery?: DeliveryOptions;
905
+ reliability: DeliveryReliability;
876
906
  requireRecipients: boolean;
877
- settleMin?: number;
907
+ minAcks?: number;
878
908
  wrap?: (promise: Promise<void>) => Promise<void>;
879
909
  } {
880
910
  const delivery: DeliveryOptions | undefined =
881
911
  deliveryArg === undefined || deliveryArg === false
882
912
  ? undefined
883
913
  : deliveryArg === true
884
- ? {}
914
+ ? { reliability: "ack" }
885
915
  : deliveryArg;
886
916
  if (!delivery) {
887
917
  return {
888
918
  delivery: undefined,
919
+ reliability: "best-effort",
889
920
  requireRecipients: false,
890
- settleMin: undefined,
921
+ minAcks: undefined,
891
922
  wrap: undefined,
892
923
  };
893
924
  }
894
925
 
895
- const deliverySettle = delivery.settle ?? true;
926
+ const reliability: DeliveryReliability = delivery.reliability ?? "ack";
896
927
  const deliveryTimeout = delivery.timeout;
897
928
  const deliverySignal = delivery.signal;
898
929
  const requireRecipients = delivery.requireRecipients === true;
899
- const settleMin =
900
- typeof deliverySettle === "object" && Number.isFinite(deliverySettle.min)
901
- ? Math.max(0, Math.floor(deliverySettle.min))
930
+ const minAcks =
931
+ delivery.minAcks != null && Number.isFinite(delivery.minAcks)
932
+ ? Math.max(0, Math.floor(delivery.minAcks))
902
933
  : undefined;
903
934
 
904
935
  const wrap =
@@ -967,12 +998,107 @@ export class SharedLog<
967
998
 
968
999
  return {
969
1000
  delivery,
1001
+ reliability,
970
1002
  requireRecipients,
971
- settleMin,
1003
+ minAcks,
972
1004
  wrap,
973
1005
  };
974
1006
  }
975
1007
 
1008
+ private async _getSortedRouteHints(
1009
+ targetHash: string,
1010
+ ): Promise<RouteHint[]> {
1011
+ const pubsub: any = this.node.services.pubsub as any;
1012
+ const maybeHints = await pubsub?.getUnifiedRouteHints?.(this.topic, targetHash);
1013
+ const hints: RouteHint[] = Array.isArray(maybeHints) ? maybeHints : [];
1014
+ const now = Date.now();
1015
+ return hints
1016
+ .filter((hint) => hint.expiresAt == null || hint.expiresAt > now)
1017
+ .sort((a, b) => {
1018
+ const rankA = a.kind === "directstream-ack" ? 0 : 1;
1019
+ const rankB = b.kind === "directstream-ack" ? 0 : 1;
1020
+ if (rankA !== rankB) {
1021
+ return rankA - rankB;
1022
+ }
1023
+
1024
+ const costA =
1025
+ a.kind === "directstream-ack"
1026
+ ? a.distance
1027
+ : Math.max(0, (a.route?.length ?? 1) - 1);
1028
+ const costB =
1029
+ b.kind === "directstream-ack"
1030
+ ? b.distance
1031
+ : Math.max(0, (b.route?.length ?? 1) - 1);
1032
+ if (costA !== costB) {
1033
+ return costA - costB;
1034
+ }
1035
+
1036
+ return (b.updatedAt ?? 0) - (a.updatedAt ?? 0);
1037
+ });
1038
+ }
1039
+
1040
+ private async _sendAckWithUnifiedHints(properties: {
1041
+ peer: string;
1042
+ message: ExchangeHeadsMessage<any>;
1043
+ payload: Uint8Array;
1044
+ fanoutUnicastOptions?: { timeoutMs?: number; signal?: AbortSignal };
1045
+ }): Promise<void> {
1046
+ const { peer, message, payload, fanoutUnicastOptions } = properties;
1047
+ const hints = await this._getSortedRouteHints(peer);
1048
+ const hasDirectHint = hints.some((hint) => hint.kind === "directstream-ack");
1049
+ const fanoutHint = hints.find(
1050
+ (hint): hint is Extract<RouteHint, { kind: "fanout-token" }> =>
1051
+ hint.kind === "fanout-token",
1052
+ );
1053
+
1054
+ if (hasDirectHint) {
1055
+ try {
1056
+ await this.rpc.send(message, {
1057
+ mode: new AcknowledgeDelivery({
1058
+ redundancy: 1,
1059
+ to: [peer],
1060
+ }),
1061
+ });
1062
+ return;
1063
+ } catch {
1064
+ // Fall back to fanout token/direct fanout unicast below.
1065
+ }
1066
+ }
1067
+
1068
+ if (fanoutHint && this._fanoutChannel) {
1069
+ try {
1070
+ await this._fanoutChannel.unicastAck(
1071
+ fanoutHint.route,
1072
+ payload,
1073
+ fanoutUnicastOptions,
1074
+ );
1075
+ return;
1076
+ } catch {
1077
+ // Fall back below.
1078
+ }
1079
+ }
1080
+
1081
+ if (this._fanoutChannel) {
1082
+ try {
1083
+ await this._fanoutChannel.unicastToAck(
1084
+ peer,
1085
+ payload,
1086
+ fanoutUnicastOptions,
1087
+ );
1088
+ return;
1089
+ } catch {
1090
+ // Fall back below.
1091
+ }
1092
+ }
1093
+
1094
+ await this.rpc.send(message, {
1095
+ mode: new AcknowledgeDelivery({
1096
+ redundancy: 1,
1097
+ to: [peer],
1098
+ }),
1099
+ });
1100
+ }
1101
+
976
1102
  private async _appendDeliverToReplicators(
977
1103
  entry: Entry<T>,
978
1104
  minReplicasValue: number,
@@ -981,7 +1107,7 @@ export class SharedLog<
981
1107
  isLeader: boolean,
982
1108
  deliveryArg: false | true | DeliveryOptions | undefined,
983
1109
  ) {
984
- const { delivery, requireRecipients, settleMin, wrap } =
1110
+ const { delivery, reliability, requireRecipients, minAcks, wrap } =
985
1111
  this._parseDeliveryOptions(deliveryArg);
986
1112
  const pending: Promise<void>[] = [];
987
1113
  const track = (promise: Promise<void>) => {
@@ -997,11 +1123,32 @@ export class SharedLog<
997
1123
  const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
998
1124
 
999
1125
  const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
1000
- const hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1126
+ let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1127
+ const allowSubscriberFallback =
1128
+ this.syncronizer instanceof SimpleSyncronizer ||
1129
+ (this.compatibility ?? Number.MAX_VALUE) < 10;
1130
+ if (!hasRemotePeers && allowSubscriberFallback) {
1131
+ try {
1132
+ const subscribers = await this._getTopicSubscribers(this.topic);
1133
+ if (subscribers && subscribers.length > 0) {
1134
+ for (const subscriber of subscribers) {
1135
+ const hash = subscriber.hashcode();
1136
+ if (hash === selfHash) {
1137
+ continue;
1138
+ }
1139
+ set.add(hash);
1140
+ leadersForDelivery?.add(hash);
1141
+ }
1142
+ hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1143
+ }
1144
+ } catch {
1145
+ // Best-effort only; keep discovered recipients as-is.
1146
+ }
1147
+ }
1001
1148
  if (!hasRemotePeers) {
1002
1149
  if (requireRecipients) {
1003
- throw new NoPeersError(this.rpc.topic);
1004
- }
1150
+ throw new NoPeersError(this.rpc.topic);
1151
+ }
1005
1152
  continue;
1006
1153
  }
1007
1154
 
@@ -1037,8 +1184,13 @@ export class SharedLog<
1037
1184
  let silentTo: string[] | undefined;
1038
1185
  // Default delivery semantics: require enough remote ACKs to reach the requested
1039
1186
  // replication degree (local append counts as 1).
1040
- const ackLimit =
1041
- settleMin == null ? Math.max(0, minReplicasValue - 1) : settleMin;
1187
+ const defaultMinAcks = Math.max(0, minReplicasValue - 1);
1188
+ const ackLimitRaw =
1189
+ reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
1190
+ const ackLimit = Math.max(
1191
+ 0,
1192
+ Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length),
1193
+ );
1042
1194
 
1043
1195
  for (const peer of orderedRemoteRecipients) {
1044
1196
  if (ackTo.length < ackLimit) {
@@ -1061,48 +1213,11 @@ export class SharedLog<
1061
1213
  for (const peer of ackTo) {
1062
1214
  track(
1063
1215
  (async () => {
1064
- // Unified decision point:
1065
- // - If we can prove a cheap direct path (connected or routed), use it.
1066
- // - Otherwise, fall back to the fanout unicast ACK path (bounded overlay routing).
1067
- // - If that fails, fall back to pubsub/RPC routing which may flood to discover routes.
1068
- const pubsub: any = this.node.services.pubsub as any;
1069
- const canDirectFast =
1070
- Boolean(pubsub?.peers?.get?.(peer)?.isWritable) ||
1071
- Boolean(
1072
- pubsub?.routes?.isReachable?.(
1073
- pubsub?.publicKeyHash,
1074
- peer,
1075
- 0,
1076
- ),
1077
- );
1078
-
1079
- if (canDirectFast) {
1080
- await this.rpc.send(message, {
1081
- mode: new AcknowledgeDelivery({
1082
- redundancy: 1,
1083
- to: [peer],
1084
- }),
1085
- });
1086
- return;
1087
- }
1088
-
1089
- if (this._fanoutChannel) {
1090
- try {
1091
- await this._fanoutChannel.unicastToAck(
1092
- peer,
1093
- payload,
1094
- fanoutUnicastOptions,
1095
- );
1096
- return;
1097
- } catch {
1098
- // fall back below
1099
- }
1100
- }
1101
- await this.rpc.send(message, {
1102
- mode: new AcknowledgeDelivery({
1103
- redundancy: 1,
1104
- to: [peer],
1105
- }),
1216
+ await this._sendAckWithUnifiedHints({
1217
+ peer,
1218
+ message,
1219
+ payload,
1220
+ fanoutUnicastOptions,
1106
1221
  });
1107
1222
  })(),
1108
1223
  );
@@ -1723,6 +1838,14 @@ export class SharedLog<
1723
1838
  this.pendingMaturity.delete(keyHash);
1724
1839
  }
1725
1840
 
1841
+ // Keep local sync/prune state consistent even when a peer disappears
1842
+ // through replication-info updates without a topic unsubscribe event.
1843
+ this.removePeerFromGidPeerHistory(keyHash);
1844
+ this._recentRepairDispatch.delete(keyHash);
1845
+ if (!isMe) {
1846
+ this.syncronizer.onPeerDisconnected(keyHash);
1847
+ }
1848
+
1726
1849
  if (!isMe) {
1727
1850
  this.rebalanceParticipationDebounced?.call();
1728
1851
  }
@@ -2207,6 +2330,201 @@ export class SharedLog<
2207
2330
  return set;
2208
2331
  }
2209
2332
 
2333
+ private dispatchMaybeMissingEntries(
2334
+ target: string,
2335
+ entries: Map<string, EntryReplicated<any>>,
2336
+ options?: {
2337
+ bypassRecentDedupe?: boolean;
2338
+ retryScheduleMs?: number[];
2339
+ forceFreshDelivery?: boolean;
2340
+ },
2341
+ ) {
2342
+ if (entries.size === 0) {
2343
+ return;
2344
+ }
2345
+
2346
+ const now = Date.now();
2347
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2348
+ if (!recentlyDispatchedByHash) {
2349
+ recentlyDispatchedByHash = new Map();
2350
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2351
+ }
2352
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2353
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2354
+ recentlyDispatchedByHash.delete(hash);
2355
+ }
2356
+ }
2357
+
2358
+ const filteredEntries =
2359
+ options?.bypassRecentDedupe === true
2360
+ ? new Map(entries)
2361
+ : new Map<string, EntryReplicated<any>>();
2362
+ if (options?.bypassRecentDedupe !== true) {
2363
+ for (const [hash, entry] of entries) {
2364
+ const prev = recentlyDispatchedByHash.get(hash);
2365
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2366
+ continue;
2367
+ }
2368
+ recentlyDispatchedByHash.set(hash, now);
2369
+ filteredEntries.set(hash, entry);
2370
+ }
2371
+ } else {
2372
+ for (const hash of entries.keys()) {
2373
+ recentlyDispatchedByHash.set(hash, now);
2374
+ }
2375
+ }
2376
+ if (filteredEntries.size === 0) {
2377
+ return;
2378
+ }
2379
+
2380
+ const run = () =>
2381
+ Promise.resolve(
2382
+ this.syncronizer.onMaybeMissingEntries({
2383
+ entries: filteredEntries,
2384
+ targets: [target],
2385
+ }),
2386
+ ).catch((error: any) => logger.error(error));
2387
+
2388
+ const retrySchedule =
2389
+ options?.retryScheduleMs && options.retryScheduleMs.length > 0
2390
+ ? options.retryScheduleMs
2391
+ : options?.forceFreshDelivery
2392
+ ? FORCE_FRESH_RETRY_SCHEDULE_MS
2393
+ : [0];
2394
+
2395
+ for (const delayMs of retrySchedule) {
2396
+ if (delayMs === 0) {
2397
+ void run();
2398
+ continue;
2399
+ }
2400
+ const timer = setTimeout(() => {
2401
+ this._repairRetryTimers.delete(timer);
2402
+ if (this.closed) {
2403
+ return;
2404
+ }
2405
+ void run();
2406
+ }, delayMs);
2407
+ timer.unref?.();
2408
+ this._repairRetryTimers.add(timer);
2409
+ }
2410
+ }
2411
+
2412
+ private scheduleRepairSweep(options: {
2413
+ forceFreshDelivery: boolean;
2414
+ addedPeers: Set<string>;
2415
+ }) {
2416
+ if (options.forceFreshDelivery) {
2417
+ this._repairSweepForceFreshPending = true;
2418
+ }
2419
+ for (const peer of options.addedPeers) {
2420
+ this._repairSweepAddedPeersPending.add(peer);
2421
+ }
2422
+ if (!this._repairSweepRunning && !this.closed) {
2423
+ this._repairSweepRunning = true;
2424
+ void this.runRepairSweep();
2425
+ }
2426
+ }
2427
+
2428
+ private async runRepairSweep() {
2429
+ try {
2430
+ while (!this.closed) {
2431
+ const forceFreshDelivery = this._repairSweepForceFreshPending;
2432
+ const addedPeers = new Set(this._repairSweepAddedPeersPending);
2433
+ this._repairSweepForceFreshPending = false;
2434
+ this._repairSweepAddedPeersPending.clear();
2435
+
2436
+ if (!forceFreshDelivery && addedPeers.size === 0) {
2437
+ return;
2438
+ }
2439
+
2440
+ const pendingByTarget = new Map<string, Map<string, EntryReplicated<any>>>();
2441
+ const flushTarget = (target: string) => {
2442
+ const entries = pendingByTarget.get(target);
2443
+ if (!entries || entries.size === 0) {
2444
+ return;
2445
+ }
2446
+ const isJoinWarmupTarget = addedPeers.has(target);
2447
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
2448
+ this.dispatchMaybeMissingEntries(target, entries, {
2449
+ bypassRecentDedupe,
2450
+ retryScheduleMs: isJoinWarmupTarget
2451
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
2452
+ : undefined,
2453
+ forceFreshDelivery,
2454
+ });
2455
+ pendingByTarget.delete(target);
2456
+ };
2457
+ const queueEntryForTarget = (
2458
+ target: string,
2459
+ entry: EntryReplicated<any>,
2460
+ ) => {
2461
+ let set = pendingByTarget.get(target);
2462
+ if (!set) {
2463
+ set = new Map();
2464
+ pendingByTarget.set(target, set);
2465
+ }
2466
+ if (set.has(entry.hash)) {
2467
+ return;
2468
+ }
2469
+ set.set(entry.hash, entry);
2470
+ if (set.size >= this.repairSweepTargetBufferSize) {
2471
+ flushTarget(target);
2472
+ }
2473
+ };
2474
+
2475
+ const iterator = this.entryCoordinatesIndex.iterate({});
2476
+ try {
2477
+ while (!this.closed && !iterator.done()) {
2478
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2479
+ for (const entry of entries) {
2480
+ const entryReplicated = entry.value;
2481
+ const currentPeers = await this.findLeaders(
2482
+ entryReplicated.coordinates,
2483
+ entryReplicated,
2484
+ { roleAge: 0 },
2485
+ );
2486
+ if (forceFreshDelivery) {
2487
+ for (const [currentPeer] of currentPeers) {
2488
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
2489
+ continue;
2490
+ }
2491
+ queueEntryForTarget(currentPeer, entryReplicated);
2492
+ }
2493
+ }
2494
+ if (addedPeers.size > 0) {
2495
+ for (const peer of addedPeers) {
2496
+ if (currentPeers.has(peer)) {
2497
+ queueEntryForTarget(peer, entryReplicated);
2498
+ }
2499
+ }
2500
+ }
2501
+ }
2502
+ }
2503
+ } finally {
2504
+ await iterator.close();
2505
+ }
2506
+
2507
+ for (const target of [...pendingByTarget.keys()]) {
2508
+ flushTarget(target);
2509
+ }
2510
+ }
2511
+ } catch (error: any) {
2512
+ if (!isNotStartedError(error)) {
2513
+ logger.error(`Repair sweep failed: ${error?.message ?? error}`);
2514
+ }
2515
+ } finally {
2516
+ this._repairSweepRunning = false;
2517
+ if (
2518
+ !this.closed &&
2519
+ (this._repairSweepForceFreshPending ||
2520
+ this._repairSweepAddedPeersPending.size > 0)
2521
+ ) {
2522
+ this._repairSweepRunning = true;
2523
+ void this.runRepairSweep();
2524
+ }
2525
+ }
2526
+ }
2527
+
2210
2528
  private async pruneDebouncedFnAddIfNotKeeping(args: {
2211
2529
  key: string;
2212
2530
  value: {
@@ -2428,10 +2746,15 @@ export class SharedLog<
2428
2746
  this._pendingIHave = new Map();
2429
2747
  this.latestReplicationInfoMessage = new Map();
2430
2748
  this._replicationInfoBlockedPeers = new Set();
2431
- this._replicationInfoRequestByPeer = new Map();
2432
- this._replicationInfoApplyQueueByPeer = new Map();
2433
- this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2434
- this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
2749
+ this._replicationInfoRequestByPeer = new Map();
2750
+ this._replicationInfoApplyQueueByPeer = new Map();
2751
+ this._repairRetryTimers = new Set();
2752
+ this._recentRepairDispatch = new Map();
2753
+ this._repairSweepRunning = false;
2754
+ this._repairSweepForceFreshPending = false;
2755
+ this._repairSweepAddedPeersPending = new Set();
2756
+ this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2757
+ this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
2435
2758
 
2436
2759
  this.uniqueReplicators = new Set();
2437
2760
  this._replicatorJoinEmitted = new Set();
@@ -2441,6 +2764,11 @@ export class SharedLog<
2441
2764
  this.oldestOpenTime = this.openTime;
2442
2765
  this.distributionDebounceTime =
2443
2766
  options?.distributionDebounceTime || DEFAULT_DISTRIBUTION_DEBOUNCE_TIME; // expect > 0
2767
+ this.repairSweepTargetBufferSize = toPositiveInteger(
2768
+ options?.sync?.repairSweepTargetBufferSize,
2769
+ REPAIR_SWEEP_TARGET_BUFFER_SIZE,
2770
+ "sync.repairSweepTargetBufferSize",
2771
+ );
2444
2772
 
2445
2773
  this.timeUntilRoleMaturity =
2446
2774
  options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
@@ -3197,6 +3525,14 @@ export class SharedLog<
3197
3525
  "unsubscribe",
3198
3526
  this._onUnsubscriptionFn,
3199
3527
  );
3528
+ for (const timer of this._repairRetryTimers) {
3529
+ clearTimeout(timer);
3530
+ }
3531
+ this._repairRetryTimers.clear();
3532
+ this._recentRepairDispatch.clear();
3533
+ this._repairSweepRunning = false;
3534
+ this._repairSweepForceFreshPending = false;
3535
+ this._repairSweepAddedPeersPending.clear();
3200
3536
 
3201
3537
  for (const [_k, v] of this._pendingDeletes) {
3202
3538
  v.clear();
@@ -3390,7 +3726,6 @@ export class SharedLog<
3390
3726
  if (filteredHeads.length === 0) {
3391
3727
  return;
3392
3728
  }
3393
-
3394
3729
  const groupedByGid = await groupByGid(filteredHeads);
3395
3730
  const promises: Promise<void>[] = [];
3396
3731
 
@@ -4382,6 +4717,7 @@ export class SharedLog<
4382
4717
  const timeout = options.timeout ?? this.waitForReplicatorTimeout;
4383
4718
 
4384
4719
  return new Promise((resolve, reject) => {
4720
+ let settled = false;
4385
4721
  const removeListeners = () => {
4386
4722
  this.events.removeEventListener("replication:change", roleListener);
4387
4723
  this.events.removeEventListener("replicator:mature", roleListener); // TODO replication:change event ?
@@ -4390,15 +4726,26 @@ export class SharedLog<
4390
4726
  abortListener,
4391
4727
  );
4392
4728
  };
4393
- const abortListener = () => {
4729
+ const settleResolve = (value: Map<string, { intersecting: boolean }> | false) => {
4730
+ if (settled) return;
4731
+ settled = true;
4732
+ removeListeners();
4733
+ clearTimeout(timer);
4734
+ resolve(value);
4735
+ };
4736
+ const settleReject = (error: unknown) => {
4737
+ if (settled) return;
4738
+ settled = true;
4394
4739
  removeListeners();
4395
4740
  clearTimeout(timer);
4396
- resolve(false);
4741
+ reject(error);
4742
+ };
4743
+ const abortListener = () => {
4744
+ settleResolve(false);
4397
4745
  };
4398
4746
 
4399
4747
  const timer = setTimeout(async () => {
4400
- removeListeners();
4401
- resolve(false);
4748
+ settleResolve(false);
4402
4749
  }, timeout);
4403
4750
 
4404
4751
  const check = async () => {
@@ -4422,19 +4769,22 @@ export class SharedLog<
4422
4769
  }
4423
4770
  options?.onLeader && leaderKeys.forEach(options.onLeader);
4424
4771
 
4425
- removeListeners();
4426
- clearTimeout(timer);
4427
- resolve(leaders);
4772
+ settleResolve(leaders);
4773
+ };
4774
+ const runCheck = () => {
4775
+ void check().catch((error) => {
4776
+ settleReject(error);
4777
+ });
4428
4778
  };
4429
4779
 
4430
4780
  const roleListener = () => {
4431
- check();
4781
+ runCheck();
4432
4782
  };
4433
4783
 
4434
4784
  this.events.addEventListener("replication:change", roleListener); // TODO replication:change event ?
4435
4785
  this.events.addEventListener("replicator:mature", roleListener); // TODO replication:change event ?
4436
4786
  this._closeController.signal.addEventListener("abort", abortListener);
4437
- check();
4787
+ runCheck();
4438
4788
  });
4439
4789
  }
4440
4790
 
@@ -4649,8 +4999,8 @@ export class SharedLog<
4649
4999
  const selfHash = this.node.identity.publicKey.hashcode();
4650
5000
 
4651
5001
  // Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
4652
- // Falling back to live pubsub subscribers can include non-replicators and can
4653
- // break delivery/availability when writers are not directly connected.
5002
+ // If it is still warming up (for example, only contains self), supplement with
5003
+ // current subscribers until we have enough candidates for this decision.
4654
5004
  let peerFilter: Set<string> | undefined = undefined;
4655
5005
  const selfReplicating = await this.isReplicating();
4656
5006
  if (this.uniqueReplicators.size > 0) {
@@ -4660,6 +5010,22 @@ export class SharedLog<
4660
5010
  } else {
4661
5011
  peerFilter.delete(selfHash);
4662
5012
  }
5013
+
5014
+ try {
5015
+ const subscribers = await this._getTopicSubscribers(this.topic);
5016
+ if (subscribers && subscribers.length > 0) {
5017
+ for (const subscriber of subscribers) {
5018
+ peerFilter.add(subscriber.hashcode());
5019
+ }
5020
+ if (selfReplicating) {
5021
+ peerFilter.add(selfHash);
5022
+ } else {
5023
+ peerFilter.delete(selfHash);
5024
+ }
5025
+ }
5026
+ } catch {
5027
+ // Best-effort only; keep current peerFilter.
5028
+ }
4663
5029
  } else {
4664
5030
  try {
4665
5031
  const subscribers =
@@ -4810,9 +5176,20 @@ export class SharedLog<
4810
5176
  }
4811
5177
 
4812
5178
  if (!subscribed) {
5179
+ const wasReplicator = this.uniqueReplicators.has(peerHash);
5180
+ try {
5181
+ // Unsubscribe can race with the peer's final replication reset message.
5182
+ // Proactively evict its ranges so leader selection doesn't keep stale owners.
5183
+ await this.removeReplicator(publicKey, { noEvent: true });
5184
+ } catch (error) {
5185
+ if (!isNotStartedError(error as Error)) {
5186
+ throw error;
5187
+ }
5188
+ }
5189
+
4813
5190
  // Emit replicator:leave at most once per (join -> leave) transition, even if we
4814
5191
  // concurrently process unsubscribe + replication reset messages for the same peer.
4815
- const stoppedTransition = this.uniqueReplicators.delete(peerHash);
5192
+ const stoppedTransition = wasReplicator;
4816
5193
  this._replicatorJoinEmitted.delete(peerHash);
4817
5194
 
4818
5195
  this.cancelReplicationInfoRequests(peerHash);
@@ -5302,9 +5679,9 @@ export class SharedLog<
5302
5679
  * that we potentially need to share with other peers
5303
5680
  */
5304
5681
 
5305
- if (this.closed) {
5306
- return;
5307
- }
5682
+ if (this.closed) {
5683
+ return;
5684
+ }
5308
5685
 
5309
5686
  await this.log.trim();
5310
5687
 
@@ -5312,23 +5689,94 @@ export class SharedLog<
5312
5689
  ? (changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>[])
5313
5690
  : [changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>];
5314
5691
  const changes = batchedChanges.flat();
5692
+ const selfHash = this.node.identity.publicKey.hashcode();
5315
5693
  // On removed ranges (peer leaves / shrink), gid-level history can hide
5316
5694
  // per-entry gaps. Force a fresh delivery pass for reassigned entries.
5317
- const forceFreshDelivery = changes.some((change) => change.type === "removed");
5695
+ const forceFreshDelivery = changes.some(
5696
+ (change) => change.type === "removed" && change.range.hash !== selfHash,
5697
+ );
5318
5698
  const gidPeersHistorySnapshot = new Map<string, Set<string> | undefined>();
5699
+ const dedupeCutoff = Date.now() - RECENT_REPAIR_DISPATCH_TTL_MS;
5700
+ for (const [target, hashes] of this._recentRepairDispatch) {
5701
+ for (const [hash, ts] of hashes) {
5702
+ if (ts <= dedupeCutoff) {
5703
+ hashes.delete(hash);
5704
+ }
5705
+ }
5706
+ if (hashes.size === 0) {
5707
+ this._recentRepairDispatch.delete(target);
5708
+ }
5709
+ }
5319
5710
 
5320
5711
  const changed = false;
5712
+ const replacedPeers = new Set<string>();
5713
+ for (const change of changes) {
5714
+ if (change.type === "replaced" && change.range.hash !== selfHash) {
5715
+ replacedPeers.add(change.range.hash);
5716
+ }
5717
+ }
5718
+ const addedPeers = new Set<string>();
5719
+ for (const change of changes) {
5720
+ if (change.type === "added" || change.type === "replaced") {
5721
+ const hash = change.range.hash;
5722
+ if (hash !== selfHash) {
5723
+ // Range updates can reassign entries to an existing peer shortly after it
5724
+ // already received a subset. Avoid suppressing legitimate follow-up repair.
5725
+ this._recentRepairDispatch.delete(hash);
5726
+ }
5727
+ }
5728
+ if (change.type === "added") {
5729
+ const hash = change.range.hash;
5730
+ if (hash !== selfHash && !replacedPeers.has(hash)) {
5731
+ addedPeers.add(hash);
5732
+ }
5733
+ }
5734
+ }
5321
5735
 
5322
5736
  try {
5323
5737
  const uncheckedDeliver: Map<
5324
5738
  string,
5325
5739
  Map<string, EntryReplicated<any>>
5326
5740
  > = new Map();
5741
+ const flushUncheckedDeliverTarget = (target: string) => {
5742
+ const entries = uncheckedDeliver.get(target);
5743
+ if (!entries || entries.size === 0) {
5744
+ return;
5745
+ }
5746
+ const isJoinWarmupTarget = addedPeers.has(target);
5747
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
5748
+ this.dispatchMaybeMissingEntries(target, entries, {
5749
+ bypassRecentDedupe,
5750
+ retryScheduleMs: isJoinWarmupTarget
5751
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
5752
+ : undefined,
5753
+ forceFreshDelivery,
5754
+ });
5755
+ uncheckedDeliver.delete(target);
5756
+ };
5757
+ const queueUncheckedDeliver = (
5758
+ target: string,
5759
+ entry: EntryReplicated<any>,
5760
+ ) => {
5761
+ let set = uncheckedDeliver.get(target);
5762
+ if (!set) {
5763
+ set = new Map();
5764
+ uncheckedDeliver.set(target, set);
5765
+ }
5766
+ if (set.has(entry.hash)) {
5767
+ return;
5768
+ }
5769
+ set.set(entry.hash, entry);
5770
+ if (set.size >= this.repairSweepTargetBufferSize) {
5771
+ flushUncheckedDeliverTarget(target);
5772
+ }
5773
+ };
5327
5774
 
5328
5775
  for await (const entryReplicated of toRebalance<R>(
5329
5776
  changes,
5330
5777
  this.entryCoordinatesIndex,
5331
5778
  this.recentlyRebalanced,
5779
+ { forceFresh: forceFreshDelivery },
5332
5780
  )) {
5333
5781
  if (this.closed) {
5334
5782
  break;
@@ -5356,24 +5804,16 @@ export class SharedLog<
5356
5804
  },
5357
5805
  );
5358
5806
 
5359
- for (const [currentPeer] of currentPeers) {
5360
- if (currentPeer === this.node.identity.publicKey.hashcode()) {
5361
- isLeader = true;
5362
- continue;
5363
- }
5364
-
5365
- if (!oldPeersSet?.has(currentPeer)) {
5366
- let set = uncheckedDeliver.get(currentPeer);
5367
- if (!set) {
5368
- set = new Map();
5369
- uncheckedDeliver.set(currentPeer, set);
5807
+ for (const [currentPeer] of currentPeers) {
5808
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
5809
+ isLeader = true;
5810
+ continue;
5370
5811
  }
5371
5812
 
5372
- if (!set.has(entryReplicated.hash)) {
5373
- set.set(entryReplicated.hash, entryReplicated);
5813
+ if (!oldPeersSet?.has(currentPeer)) {
5814
+ queueUncheckedDeliver(currentPeer, entryReplicated);
5374
5815
  }
5375
5816
  }
5376
- }
5377
5817
 
5378
5818
  if (oldPeersSet) {
5379
5819
  for (const oldPeer of oldPeersSet) {
@@ -5404,11 +5844,15 @@ export class SharedLog<
5404
5844
  this.removePruneRequestSent(entryReplicated.hash);
5405
5845
  }
5406
5846
  }
5407
- for (const [target, entries] of uncheckedDeliver) {
5408
- this.syncronizer.onMaybeMissingEntries({
5409
- entries,
5410
- targets: [target],
5411
- });
5847
+
5848
+ if (forceFreshDelivery || addedPeers.size > 0) {
5849
+ // Schedule a coalesced background sweep for churn/join windows instead of
5850
+ // scanning the whole index synchronously on each replication change.
5851
+ this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
5852
+ }
5853
+
5854
+ for (const target of [...uncheckedDeliver.keys()]) {
5855
+ flushUncheckedDeliverTarget(target);
5412
5856
  }
5413
5857
 
5414
5858
  return changed;
@@ -5422,51 +5866,52 @@ export class SharedLog<
5422
5866
  }
5423
5867
  }
5424
5868
 
5425
- async _onUnsubscription(evt: CustomEvent<UnsubcriptionEvent>) {
5426
- logger.trace(
5427
- `Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(
5428
- evt.detail.topics.map((x) => x),
5429
- )} '`,
5430
- );
5431
- if (!evt.detail.topics.includes(this.topic)) {
5432
- return;
5433
- }
5869
+ async _onUnsubscription(evt: CustomEvent<UnsubcriptionEvent>) {
5870
+ logger.trace(
5871
+ `Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(
5872
+ evt.detail.topics.map((x) => x),
5873
+ )} '`,
5874
+ );
5875
+ if (!evt.detail.topics.includes(this.topic)) {
5876
+ return;
5877
+ }
5434
5878
 
5435
- const fromHash = evt.detail.from.hashcode();
5436
- this._replicationInfoBlockedPeers.add(fromHash);
5879
+ const fromHash = evt.detail.from.hashcode();
5880
+ this._replicationInfoBlockedPeers.add(fromHash);
5881
+ this._recentRepairDispatch.delete(fromHash);
5882
+
5883
+ // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
5884
+ // prevents late/out-of-order replication-info messages from re-introducing
5885
+ // stale segments for a peer that has already left the topic.
5886
+ const now = BigInt(+new Date());
5887
+ const prev = this.latestReplicationInfoMessage.get(fromHash);
5888
+ if (!prev || prev < now) {
5889
+ this.latestReplicationInfoMessage.set(fromHash, now);
5890
+ }
5437
5891
 
5438
- // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
5439
- // prevents late/out-of-order replication-info messages from re-introducing
5440
- // stale segments for a peer that has already left the topic.
5441
- const now = BigInt(+new Date());
5442
- const prev = this.latestReplicationInfoMessage.get(fromHash);
5443
- if (!prev || prev < now) {
5444
- this.latestReplicationInfoMessage.set(fromHash, now);
5445
- }
5892
+ return this.handleSubscriptionChange(
5893
+ evt.detail.from,
5894
+ evt.detail.topics,
5895
+ false,
5896
+ );
5897
+ }
5446
5898
 
5447
- return this.handleSubscriptionChange(
5448
- evt.detail.from,
5449
- evt.detail.topics,
5450
- false,
5451
- );
5899
+ async _onSubscription(evt: CustomEvent<SubscriptionEvent>) {
5900
+ logger.trace(
5901
+ `New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(
5902
+ evt.detail.topics.map((x) => x),
5903
+ )}'`,
5904
+ );
5905
+ if (!evt.detail.topics.includes(this.topic)) {
5906
+ return;
5452
5907
  }
5453
5908
 
5454
- async _onSubscription(evt: CustomEvent<SubscriptionEvent>) {
5455
- logger.trace(
5456
- `New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(
5457
- evt.detail.topics.map((x) => x),
5458
- )}'`,
5459
- );
5460
- if (!evt.detail.topics.includes(this.topic)) {
5461
- return;
5462
- }
5463
-
5464
- this.remoteBlocks.onReachable(evt.detail.from);
5465
- this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
5909
+ this.remoteBlocks.onReachable(evt.detail.from);
5910
+ this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
5466
5911
 
5467
- return this.handleSubscriptionChange(
5468
- evt.detail.from,
5469
- evt.detail.topics,
5912
+ await this.handleSubscriptionChange(
5913
+ evt.detail.from,
5914
+ evt.detail.topics,
5470
5915
  true,
5471
5916
  );
5472
5917
  }