@peerbit/shared-log 12.3.5 → 13.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/sync-batch-sweep.d.ts +2 -0
- package/dist/benchmark/sync-batch-sweep.d.ts.map +1 -0
- package/dist/benchmark/sync-batch-sweep.js +305 -0
- package/dist/benchmark/sync-batch-sweep.js.map +1 -0
- package/dist/src/fanout-envelope.d.ts +18 -0
- package/dist/src/fanout-envelope.d.ts.map +1 -0
- package/dist/src/fanout-envelope.js +85 -0
- package/dist/src/fanout-envelope.js.map +1 -0
- package/dist/src/index.d.ts +55 -6
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1595 -339
- package/dist/src/index.js.map +1 -1
- package/dist/src/pid.d.ts.map +1 -1
- package/dist/src/pid.js +21 -5
- package/dist/src/pid.js.map +1 -1
- package/dist/src/ranges.d.ts +3 -1
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +14 -5
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/index.d.ts +45 -1
- package/dist/src/sync/index.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.d.ts +13 -2
- package/dist/src/sync/rateless-iblt.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.js +194 -3
- package/dist/src/sync/rateless-iblt.js.map +1 -1
- package/dist/src/sync/simple.d.ts +24 -3
- package/dist/src/sync/simple.d.ts.map +1 -1
- package/dist/src/sync/simple.js +330 -32
- package/dist/src/sync/simple.js.map +1 -1
- package/package.json +16 -16
- package/src/fanout-envelope.ts +27 -0
- package/src/index.ts +2162 -691
- package/src/pid.ts +22 -4
- package/src/ranges.ts +14 -4
- package/src/sync/index.ts +53 -1
- package/src/sync/rateless-iblt.ts +237 -4
- package/src/sync/simple.ts +427 -41
package/src/index.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import { BorshError, field, variant } from "@dao-xyz/borsh";
|
|
1
|
+
import { BorshError, deserialize, field, serialize, variant } from "@dao-xyz/borsh";
|
|
2
2
|
import { AnyBlockStore, RemoteBlocks } from "@peerbit/blocks";
|
|
3
3
|
import { cidifyString } from "@peerbit/blocks-interface";
|
|
4
4
|
import { Cache } from "@peerbit/cache";
|
|
5
5
|
import {
|
|
6
6
|
AccessError,
|
|
7
7
|
PublicSignKey,
|
|
8
|
+
getPublicKeyFromPeerId,
|
|
8
9
|
sha256Base64Sync,
|
|
9
10
|
sha256Sync,
|
|
10
11
|
} from "@peerbit/crypto";
|
|
@@ -31,7 +32,16 @@ import {
|
|
|
31
32
|
} from "@peerbit/log";
|
|
32
33
|
import { logger as loggerFn } from "@peerbit/logger";
|
|
33
34
|
import { ClosedError, Program, type ProgramEvents } from "@peerbit/program";
|
|
34
|
-
import {
|
|
35
|
+
import {
|
|
36
|
+
FanoutChannel,
|
|
37
|
+
type FanoutProviderHandle,
|
|
38
|
+
type FanoutTree,
|
|
39
|
+
type FanoutTreeChannelOptions,
|
|
40
|
+
type FanoutTreeDataEvent,
|
|
41
|
+
type FanoutTreeUnicastEvent,
|
|
42
|
+
type FanoutTreeJoinOptions,
|
|
43
|
+
waitForSubscribers,
|
|
44
|
+
} from "@peerbit/pubsub";
|
|
35
45
|
import {
|
|
36
46
|
SubscriptionEvent,
|
|
37
47
|
UnsubcriptionEvent,
|
|
@@ -40,10 +50,11 @@ import { RPC, type RequestContext } from "@peerbit/rpc";
|
|
|
40
50
|
import {
|
|
41
51
|
AcknowledgeDelivery,
|
|
42
52
|
AnyWhere,
|
|
53
|
+
DataMessage,
|
|
54
|
+
MessageHeader,
|
|
43
55
|
NotStartedError,
|
|
44
|
-
|
|
56
|
+
type RouteHint,
|
|
45
57
|
SilentDelivery,
|
|
46
|
-
type WithMode,
|
|
47
58
|
} from "@peerbit/stream-interface";
|
|
48
59
|
import {
|
|
49
60
|
AbortError,
|
|
@@ -69,6 +80,7 @@ import {
|
|
|
69
80
|
ResponseIPrune,
|
|
70
81
|
createExchangeHeadsMessages,
|
|
71
82
|
} from "./exchange-heads.js";
|
|
83
|
+
import { FanoutEnvelope } from "./fanout-envelope.js";
|
|
72
84
|
import {
|
|
73
85
|
MAX_U32,
|
|
74
86
|
MAX_U64,
|
|
@@ -189,6 +201,36 @@ const getLatestEntry = (
|
|
|
189
201
|
return latest;
|
|
190
202
|
};
|
|
191
203
|
|
|
204
|
+
const hashToSeed32 = (str: string) => {
|
|
205
|
+
// FNV-1a 32-bit, fast and deterministic.
|
|
206
|
+
let hash = 0x811c9dc5;
|
|
207
|
+
for (let i = 0; i < str.length; i++) {
|
|
208
|
+
hash ^= str.charCodeAt(i);
|
|
209
|
+
hash = Math.imul(hash, 0x01000193);
|
|
210
|
+
}
|
|
211
|
+
return hash >>> 0;
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
const pickDeterministicSubset = (peers: string[], seed: number, max: number) => {
|
|
215
|
+
if (peers.length <= max) return peers;
|
|
216
|
+
|
|
217
|
+
const subset: string[] = [];
|
|
218
|
+
const used = new Set<string>();
|
|
219
|
+
let x = seed || 1;
|
|
220
|
+
while (subset.length < max) {
|
|
221
|
+
// xorshift32
|
|
222
|
+
x ^= x << 13;
|
|
223
|
+
x ^= x >>> 17;
|
|
224
|
+
x ^= x << 5;
|
|
225
|
+
const peer = peers[(x >>> 0) % peers.length];
|
|
226
|
+
if (!used.has(peer)) {
|
|
227
|
+
used.add(peer);
|
|
228
|
+
subset.push(peer);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return subset;
|
|
232
|
+
};
|
|
233
|
+
|
|
192
234
|
export type ReplicationLimitsOptions =
|
|
193
235
|
| Partial<ReplicationLimits>
|
|
194
236
|
| { min?: number; max?: number };
|
|
@@ -373,6 +415,7 @@ export type SharedLogOptions<
|
|
|
373
415
|
compatibility?: number;
|
|
374
416
|
domain?: ReplicationDomainConstructor<D>;
|
|
375
417
|
eagerBlocks?: boolean | { cacheSize?: number };
|
|
418
|
+
fanout?: SharedLogFanoutOptions;
|
|
376
419
|
};
|
|
377
420
|
|
|
378
421
|
export const DEFAULT_MIN_REPLICAS = 2;
|
|
@@ -385,6 +428,10 @@ export const WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS = 3;
|
|
|
385
428
|
// Prefer making pruning robust without timing-based heuristics.
|
|
386
429
|
export const WAIT_FOR_PRUNE_DELAY = 0;
|
|
387
430
|
const PRUNE_DEBOUNCE_INTERVAL = 500;
|
|
431
|
+
const CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS = 250;
|
|
432
|
+
const CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS = 5_000;
|
|
433
|
+
const CHECKED_PRUNE_RETRY_MAX_ATTEMPTS = 3;
|
|
434
|
+
const CHECKED_PRUNE_RETRY_MAX_DELAY_MS = 30_000;
|
|
388
435
|
|
|
389
436
|
// DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
|
|
390
437
|
const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
|
|
@@ -394,6 +441,36 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
|
|
|
394
441
|
const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
|
|
395
442
|
|
|
396
443
|
const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
|
|
444
|
+
const RECENT_REPAIR_DISPATCH_TTL_MS = 5_000;
|
|
445
|
+
const REPAIR_SWEEP_ENTRY_BATCH_SIZE = 1_000;
|
|
446
|
+
const REPAIR_SWEEP_TARGET_BUFFER_SIZE = 1024;
|
|
447
|
+
const FORCE_FRESH_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
|
|
448
|
+
const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000];
|
|
449
|
+
|
|
450
|
+
const toPositiveInteger = (
|
|
451
|
+
value: number | undefined,
|
|
452
|
+
fallback: number,
|
|
453
|
+
label: string,
|
|
454
|
+
) => {
|
|
455
|
+
if (value == null) {
|
|
456
|
+
return fallback;
|
|
457
|
+
}
|
|
458
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
459
|
+
throw new Error(`${label} must be a positive number`);
|
|
460
|
+
}
|
|
461
|
+
return Math.max(1, Math.floor(value));
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS: Omit<
|
|
465
|
+
FanoutTreeChannelOptions,
|
|
466
|
+
"role"
|
|
467
|
+
> = {
|
|
468
|
+
msgRate: 30,
|
|
469
|
+
msgSize: 1024,
|
|
470
|
+
uploadLimitBps: 5_000_000,
|
|
471
|
+
maxChildren: 24,
|
|
472
|
+
repair: true,
|
|
473
|
+
};
|
|
397
474
|
|
|
398
475
|
const getIdForDynamicRange = (publicKey: PublicSignKey) => {
|
|
399
476
|
return sha256Sync(
|
|
@@ -417,20 +494,39 @@ export type Args<
|
|
|
417
494
|
: "u32",
|
|
418
495
|
> = LogProperties<T> & LogEvents<T> & SharedLogOptions<T, D, R>;
|
|
419
496
|
|
|
497
|
+
export type DeliveryReliability = "ack" | "best-effort";
|
|
498
|
+
|
|
420
499
|
export type DeliveryOptions = {
|
|
421
|
-
|
|
500
|
+
reliability?: DeliveryReliability;
|
|
501
|
+
minAcks?: number;
|
|
422
502
|
requireRecipients?: boolean;
|
|
423
503
|
timeout?: number;
|
|
424
504
|
signal?: AbortSignal;
|
|
425
505
|
};
|
|
426
506
|
|
|
427
|
-
export type
|
|
507
|
+
export type SharedLogFanoutOptions = {
|
|
508
|
+
root?: string;
|
|
509
|
+
channel?: Partial<Omit<FanoutTreeChannelOptions, "role">>;
|
|
510
|
+
join?: FanoutTreeJoinOptions;
|
|
511
|
+
};
|
|
512
|
+
|
|
513
|
+
type SharedAppendBaseOptions<T> = AppendOptions<T> & {
|
|
428
514
|
replicas?: AbsoluteReplicas | number;
|
|
429
515
|
replicate?: boolean;
|
|
430
|
-
target?: "all" | "replicators" | "none";
|
|
431
|
-
delivery?: false | true | DeliveryOptions;
|
|
432
516
|
};
|
|
433
517
|
|
|
518
|
+
export type SharedAppendOptions<T> =
|
|
519
|
+
| (SharedAppendBaseOptions<T> & {
|
|
520
|
+
target?: "replicators" | "none";
|
|
521
|
+
delivery?: false | true | DeliveryOptions;
|
|
522
|
+
})
|
|
523
|
+
| (SharedAppendBaseOptions<T> & {
|
|
524
|
+
// target=all uses the fanout data plane and intentionally does not expose
|
|
525
|
+
// per-recipient settle semantics from RPC delivery options.
|
|
526
|
+
target: "all";
|
|
527
|
+
delivery?: false | undefined;
|
|
528
|
+
});
|
|
529
|
+
|
|
434
530
|
export type ReplicatorJoinEvent = { publicKey: PublicSignKey };
|
|
435
531
|
export type ReplicatorLeaveEvent = { publicKey: PublicSignKey };
|
|
436
532
|
export type ReplicationChangeEvent = { publicKey: PublicSignKey };
|
|
@@ -463,11 +559,12 @@ export class SharedLog<
|
|
|
463
559
|
|
|
464
560
|
private _replicationRangeIndex!: Index<ReplicationRangeIndexable<R>>;
|
|
465
561
|
private _entryCoordinatesIndex!: Index<EntryReplicated<R>>;
|
|
466
|
-
|
|
467
|
-
|
|
562
|
+
private coordinateToHash!: Cache<string>;
|
|
563
|
+
private recentlyRebalanced!: Cache<string>;
|
|
468
564
|
|
|
469
|
-
|
|
470
|
-
|
|
565
|
+
uniqueReplicators!: Set<string>;
|
|
566
|
+
private _replicatorJoinEmitted!: Set<string>;
|
|
567
|
+
private _replicatorsReconciled!: boolean;
|
|
471
568
|
|
|
472
569
|
/* private _totalParticipation!: number; */
|
|
473
570
|
|
|
@@ -476,6 +573,10 @@ export class SharedLog<
|
|
|
476
573
|
|
|
477
574
|
private _onSubscriptionFn!: (arg: any) => any;
|
|
478
575
|
private _onUnsubscriptionFn!: (arg: any) => any;
|
|
576
|
+
private _onFanoutDataFn?: (arg: any) => void;
|
|
577
|
+
private _onFanoutUnicastFn?: (arg: any) => void;
|
|
578
|
+
private _fanoutChannel?: FanoutChannel;
|
|
579
|
+
private _providerHandle?: FanoutProviderHandle;
|
|
479
580
|
|
|
480
581
|
private _isTrustedReplicator?: (
|
|
481
582
|
publicKey: PublicSignKey,
|
|
@@ -519,6 +620,15 @@ export class SharedLog<
|
|
|
519
620
|
>; // map of peerId to timeout
|
|
520
621
|
|
|
521
622
|
private latestReplicationInfoMessage!: Map<string, bigint>;
|
|
623
|
+
// Peers that have unsubscribed from this log's topic. We ignore replication-info
|
|
624
|
+
// messages from them until we see a new subscription, to avoid re-introducing
|
|
625
|
+
// stale membership state during close/unsubscribe races.
|
|
626
|
+
private _replicationInfoBlockedPeers!: Set<string>;
|
|
627
|
+
private _replicationInfoRequestByPeer!: Map<
|
|
628
|
+
string,
|
|
629
|
+
{ attempts: number; timer?: ReturnType<typeof setTimeout> }
|
|
630
|
+
>;
|
|
631
|
+
private _replicationInfoApplyQueueByPeer!: Map<string, Promise<void>>;
|
|
522
632
|
|
|
523
633
|
private remoteBlocks!: RemoteBlocks;
|
|
524
634
|
|
|
@@ -552,10 +662,19 @@ export class SharedLog<
|
|
|
552
662
|
|
|
553
663
|
private _requestIPruneSent!: Map<string, Set<string>>; // tracks entry hash to peer hash for requesting I prune messages
|
|
554
664
|
private _requestIPruneResponseReplicatorSet!: Map<string, Set<string>>; // tracks entry hash to peer hash
|
|
665
|
+
private _checkedPruneRetries!: Map<
|
|
666
|
+
string,
|
|
667
|
+
{ attempts: number; timer?: ReturnType<typeof setTimeout> }
|
|
668
|
+
>;
|
|
555
669
|
|
|
556
670
|
private replicationChangeDebounceFn!: ReturnType<
|
|
557
671
|
typeof debounceAggregationChanges<ReplicationRangeIndexable<R>>
|
|
558
672
|
>;
|
|
673
|
+
private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
|
|
674
|
+
private _recentRepairDispatch!: Map<string, Map<string, number>>;
|
|
675
|
+
private _repairSweepRunning!: boolean;
|
|
676
|
+
private _repairSweepForceFreshPending!: boolean;
|
|
677
|
+
private _repairSweepAddedPeersPending!: Set<string>;
|
|
559
678
|
|
|
560
679
|
// regular distribution checks
|
|
561
680
|
private distributeQueue?: PQueue;
|
|
@@ -572,6 +691,7 @@ export class SharedLog<
|
|
|
572
691
|
waitForReplicatorRequestMaxAttempts?: number;
|
|
573
692
|
waitForPruneDelay!: number;
|
|
574
693
|
distributionDebounceTime!: number;
|
|
694
|
+
repairSweepTargetBufferSize!: number;
|
|
575
695
|
|
|
576
696
|
replicationController!: PIDReplicationController;
|
|
577
697
|
history!: { usedMemory: number; factor: number }[];
|
|
@@ -597,6 +717,676 @@ export class SharedLog<
|
|
|
597
717
|
return (this.compatibility ?? Number.MAX_VALUE) < 9;
|
|
598
718
|
}
|
|
599
719
|
|
|
720
|
+
private getFanoutChannelOptions(
|
|
721
|
+
options?: SharedLogFanoutOptions,
|
|
722
|
+
): Omit<FanoutTreeChannelOptions, "role"> {
|
|
723
|
+
return {
|
|
724
|
+
...DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS,
|
|
725
|
+
...(options?.channel ?? {}),
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
private async _openFanoutChannel(options?: SharedLogFanoutOptions) {
|
|
730
|
+
this._closeFanoutChannel();
|
|
731
|
+
if (!options) {
|
|
732
|
+
return;
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
const fanoutService = (this.node.services as any).fanout;
|
|
736
|
+
if (!fanoutService) {
|
|
737
|
+
throw new Error(
|
|
738
|
+
`Fanout is configured for shared-log topic ${this.topic}, but no fanout service is available on this client`,
|
|
739
|
+
);
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
const resolvedRoot =
|
|
743
|
+
options.root ??
|
|
744
|
+
(await (fanoutService as any)?.topicRootControlPlane?.resolveTopicRoot?.(
|
|
745
|
+
this.topic,
|
|
746
|
+
));
|
|
747
|
+
if (!resolvedRoot) {
|
|
748
|
+
throw new Error(
|
|
749
|
+
`Fanout is configured for shared-log topic ${this.topic}, but no fanout root was provided and none could be resolved`,
|
|
750
|
+
);
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
const channel = new FanoutChannel(fanoutService, {
|
|
754
|
+
topic: this.topic,
|
|
755
|
+
root: resolvedRoot,
|
|
756
|
+
});
|
|
757
|
+
this._fanoutChannel = channel;
|
|
758
|
+
|
|
759
|
+
this._onFanoutDataFn =
|
|
760
|
+
this._onFanoutDataFn ||
|
|
761
|
+
((evt: any) => {
|
|
762
|
+
const detail = (evt as CustomEvent<FanoutTreeDataEvent>)?.detail;
|
|
763
|
+
if (!detail) {
|
|
764
|
+
return;
|
|
765
|
+
}
|
|
766
|
+
void this._onFanoutData(detail).catch((error) => logger.error(error));
|
|
767
|
+
});
|
|
768
|
+
channel.addEventListener("data", this._onFanoutDataFn);
|
|
769
|
+
|
|
770
|
+
this._onFanoutUnicastFn =
|
|
771
|
+
this._onFanoutUnicastFn ||
|
|
772
|
+
((evt: any) => {
|
|
773
|
+
const detail = (evt as CustomEvent<FanoutTreeUnicastEvent>)?.detail;
|
|
774
|
+
if (!detail) {
|
|
775
|
+
return;
|
|
776
|
+
}
|
|
777
|
+
void this._onFanoutUnicast(detail).catch((error) => logger.error(error));
|
|
778
|
+
});
|
|
779
|
+
channel.addEventListener("unicast", this._onFanoutUnicastFn);
|
|
780
|
+
|
|
781
|
+
try {
|
|
782
|
+
const channelOptions = this.getFanoutChannelOptions(options);
|
|
783
|
+
if (resolvedRoot === fanoutService.publicKeyHash) {
|
|
784
|
+
await channel.openAsRoot(channelOptions);
|
|
785
|
+
return;
|
|
786
|
+
}
|
|
787
|
+
await channel.join(channelOptions, options.join);
|
|
788
|
+
} catch (error) {
|
|
789
|
+
this._closeFanoutChannel();
|
|
790
|
+
throw error;
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
private _closeFanoutChannel() {
|
|
795
|
+
if (this._fanoutChannel) {
|
|
796
|
+
if (this._onFanoutDataFn) {
|
|
797
|
+
this._fanoutChannel.removeEventListener("data", this._onFanoutDataFn);
|
|
798
|
+
}
|
|
799
|
+
if (this._onFanoutUnicastFn) {
|
|
800
|
+
this._fanoutChannel.removeEventListener(
|
|
801
|
+
"unicast",
|
|
802
|
+
this._onFanoutUnicastFn,
|
|
803
|
+
);
|
|
804
|
+
}
|
|
805
|
+
this._fanoutChannel.close();
|
|
806
|
+
}
|
|
807
|
+
this._fanoutChannel = undefined;
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
private async _onFanoutData(detail: FanoutTreeDataEvent) {
|
|
811
|
+
let envelope: FanoutEnvelope;
|
|
812
|
+
try {
|
|
813
|
+
envelope = deserialize(detail.payload, FanoutEnvelope);
|
|
814
|
+
} catch (error) {
|
|
815
|
+
if (error instanceof BorshError) {
|
|
816
|
+
return;
|
|
817
|
+
}
|
|
818
|
+
throw error;
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
let message: TransportMessage;
|
|
822
|
+
try {
|
|
823
|
+
message = deserialize(envelope.payload, TransportMessage);
|
|
824
|
+
} catch (error) {
|
|
825
|
+
if (error instanceof BorshError) {
|
|
826
|
+
return;
|
|
827
|
+
}
|
|
828
|
+
throw error;
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
if (!(message instanceof ExchangeHeadsMessage)) {
|
|
832
|
+
return;
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
const from =
|
|
836
|
+
(await this._resolvePublicKeyFromHash(envelope.from)) ??
|
|
837
|
+
({ hashcode: () => envelope.from } as PublicSignKey);
|
|
838
|
+
|
|
839
|
+
const contextMessage = new DataMessage({
|
|
840
|
+
header: new MessageHeader({
|
|
841
|
+
session: 0,
|
|
842
|
+
mode: new AnyWhere(),
|
|
843
|
+
priority: 0,
|
|
844
|
+
}),
|
|
845
|
+
});
|
|
846
|
+
contextMessage.header.timestamp = envelope.timestamp;
|
|
847
|
+
|
|
848
|
+
await this.onMessage(message, {
|
|
849
|
+
from,
|
|
850
|
+
message: contextMessage,
|
|
851
|
+
});
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
private async _onFanoutUnicast(detail: FanoutTreeUnicastEvent) {
|
|
855
|
+
let message: TransportMessage;
|
|
856
|
+
try {
|
|
857
|
+
message = deserialize(detail.payload, TransportMessage);
|
|
858
|
+
} catch (error) {
|
|
859
|
+
if (error instanceof BorshError) {
|
|
860
|
+
return;
|
|
861
|
+
}
|
|
862
|
+
throw error;
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
const fromHash = detail.origin || detail.from;
|
|
866
|
+
const from =
|
|
867
|
+
(await this._resolvePublicKeyFromHash(fromHash)) ??
|
|
868
|
+
({ hashcode: () => fromHash } as PublicSignKey);
|
|
869
|
+
|
|
870
|
+
const contextMessage = new DataMessage({
|
|
871
|
+
header: new MessageHeader({
|
|
872
|
+
session: 0,
|
|
873
|
+
mode: new AnyWhere(),
|
|
874
|
+
priority: 0,
|
|
875
|
+
}),
|
|
876
|
+
});
|
|
877
|
+
contextMessage.header.timestamp = detail.timestamp;
|
|
878
|
+
|
|
879
|
+
await this.onMessage(message, {
|
|
880
|
+
from,
|
|
881
|
+
message: contextMessage,
|
|
882
|
+
});
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
private async _publishExchangeHeadsViaFanout(
|
|
886
|
+
message: ExchangeHeadsMessage<any>,
|
|
887
|
+
): Promise<void> {
|
|
888
|
+
if (!this._fanoutChannel) {
|
|
889
|
+
throw new Error(
|
|
890
|
+
`No fanout channel configured for shared-log topic ${this.topic}`,
|
|
891
|
+
);
|
|
892
|
+
}
|
|
893
|
+
const envelope = new FanoutEnvelope({
|
|
894
|
+
from: this.node.identity.publicKey.hashcode(),
|
|
895
|
+
timestamp: BigInt(Date.now()),
|
|
896
|
+
payload: serialize(message),
|
|
897
|
+
});
|
|
898
|
+
await this._fanoutChannel.publish(serialize(envelope));
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
private _parseDeliveryOptions(
|
|
902
|
+
deliveryArg: false | true | DeliveryOptions | undefined,
|
|
903
|
+
): {
|
|
904
|
+
delivery?: DeliveryOptions;
|
|
905
|
+
reliability: DeliveryReliability;
|
|
906
|
+
requireRecipients: boolean;
|
|
907
|
+
minAcks?: number;
|
|
908
|
+
wrap?: (promise: Promise<void>) => Promise<void>;
|
|
909
|
+
} {
|
|
910
|
+
const delivery: DeliveryOptions | undefined =
|
|
911
|
+
deliveryArg === undefined || deliveryArg === false
|
|
912
|
+
? undefined
|
|
913
|
+
: deliveryArg === true
|
|
914
|
+
? { reliability: "ack" }
|
|
915
|
+
: deliveryArg;
|
|
916
|
+
if (!delivery) {
|
|
917
|
+
return {
|
|
918
|
+
delivery: undefined,
|
|
919
|
+
reliability: "best-effort",
|
|
920
|
+
requireRecipients: false,
|
|
921
|
+
minAcks: undefined,
|
|
922
|
+
wrap: undefined,
|
|
923
|
+
};
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
const reliability: DeliveryReliability = delivery.reliability ?? "ack";
|
|
927
|
+
const deliveryTimeout = delivery.timeout;
|
|
928
|
+
const deliverySignal = delivery.signal;
|
|
929
|
+
const requireRecipients = delivery.requireRecipients === true;
|
|
930
|
+
const minAcks =
|
|
931
|
+
delivery.minAcks != null && Number.isFinite(delivery.minAcks)
|
|
932
|
+
? Math.max(0, Math.floor(delivery.minAcks))
|
|
933
|
+
: undefined;
|
|
934
|
+
|
|
935
|
+
const wrap =
|
|
936
|
+
deliveryTimeout == null && deliverySignal == null
|
|
937
|
+
? undefined
|
|
938
|
+
: (promise: Promise<void>) =>
|
|
939
|
+
new Promise<void>((resolve, reject) => {
|
|
940
|
+
let settled = false;
|
|
941
|
+
let timer: ReturnType<typeof setTimeout> | undefined = undefined;
|
|
942
|
+
const onAbort = () => {
|
|
943
|
+
if (settled) {
|
|
944
|
+
return;
|
|
945
|
+
}
|
|
946
|
+
settled = true;
|
|
947
|
+
promise.catch(() => {});
|
|
948
|
+
cleanup();
|
|
949
|
+
reject(new AbortError());
|
|
950
|
+
};
|
|
951
|
+
|
|
952
|
+
const cleanup = () => {
|
|
953
|
+
if (timer != null) {
|
|
954
|
+
clearTimeout(timer);
|
|
955
|
+
timer = undefined;
|
|
956
|
+
}
|
|
957
|
+
deliverySignal?.removeEventListener("abort", onAbort);
|
|
958
|
+
};
|
|
959
|
+
|
|
960
|
+
if (deliverySignal) {
|
|
961
|
+
if (deliverySignal.aborted) {
|
|
962
|
+
onAbort();
|
|
963
|
+
return;
|
|
964
|
+
}
|
|
965
|
+
deliverySignal.addEventListener("abort", onAbort);
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
if (deliveryTimeout != null) {
|
|
969
|
+
timer = setTimeout(() => {
|
|
970
|
+
if (settled) {
|
|
971
|
+
return;
|
|
972
|
+
}
|
|
973
|
+
settled = true;
|
|
974
|
+
promise.catch(() => {});
|
|
975
|
+
cleanup();
|
|
976
|
+
reject(new TimeoutError(`Timeout waiting for delivery`));
|
|
977
|
+
}, deliveryTimeout);
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
promise
|
|
981
|
+
.then(() => {
|
|
982
|
+
if (settled) {
|
|
983
|
+
return;
|
|
984
|
+
}
|
|
985
|
+
settled = true;
|
|
986
|
+
cleanup();
|
|
987
|
+
resolve();
|
|
988
|
+
})
|
|
989
|
+
.catch((error) => {
|
|
990
|
+
if (settled) {
|
|
991
|
+
return;
|
|
992
|
+
}
|
|
993
|
+
settled = true;
|
|
994
|
+
cleanup();
|
|
995
|
+
reject(error);
|
|
996
|
+
});
|
|
997
|
+
});
|
|
998
|
+
|
|
999
|
+
return {
|
|
1000
|
+
delivery,
|
|
1001
|
+
reliability,
|
|
1002
|
+
requireRecipients,
|
|
1003
|
+
minAcks,
|
|
1004
|
+
wrap,
|
|
1005
|
+
};
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
private async _getSortedRouteHints(
|
|
1009
|
+
targetHash: string,
|
|
1010
|
+
): Promise<RouteHint[]> {
|
|
1011
|
+
const pubsub: any = this.node.services.pubsub as any;
|
|
1012
|
+
const maybeHints = await pubsub?.getUnifiedRouteHints?.(this.topic, targetHash);
|
|
1013
|
+
const hints: RouteHint[] = Array.isArray(maybeHints) ? maybeHints : [];
|
|
1014
|
+
const now = Date.now();
|
|
1015
|
+
return hints
|
|
1016
|
+
.filter((hint) => hint.expiresAt == null || hint.expiresAt > now)
|
|
1017
|
+
.sort((a, b) => {
|
|
1018
|
+
const rankA = a.kind === "directstream-ack" ? 0 : 1;
|
|
1019
|
+
const rankB = b.kind === "directstream-ack" ? 0 : 1;
|
|
1020
|
+
if (rankA !== rankB) {
|
|
1021
|
+
return rankA - rankB;
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
const costA =
|
|
1025
|
+
a.kind === "directstream-ack"
|
|
1026
|
+
? a.distance
|
|
1027
|
+
: Math.max(0, (a.route?.length ?? 1) - 1);
|
|
1028
|
+
const costB =
|
|
1029
|
+
b.kind === "directstream-ack"
|
|
1030
|
+
? b.distance
|
|
1031
|
+
: Math.max(0, (b.route?.length ?? 1) - 1);
|
|
1032
|
+
if (costA !== costB) {
|
|
1033
|
+
return costA - costB;
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
return (b.updatedAt ?? 0) - (a.updatedAt ?? 0);
|
|
1037
|
+
});
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
private async _sendAckWithUnifiedHints(properties: {
|
|
1041
|
+
peer: string;
|
|
1042
|
+
message: ExchangeHeadsMessage<any>;
|
|
1043
|
+
payload: Uint8Array;
|
|
1044
|
+
fanoutUnicastOptions?: { timeoutMs?: number; signal?: AbortSignal };
|
|
1045
|
+
}): Promise<void> {
|
|
1046
|
+
const { peer, message, payload, fanoutUnicastOptions } = properties;
|
|
1047
|
+
const hints = await this._getSortedRouteHints(peer);
|
|
1048
|
+
const hasDirectHint = hints.some((hint) => hint.kind === "directstream-ack");
|
|
1049
|
+
const fanoutHint = hints.find(
|
|
1050
|
+
(hint): hint is Extract<RouteHint, { kind: "fanout-token" }> =>
|
|
1051
|
+
hint.kind === "fanout-token",
|
|
1052
|
+
);
|
|
1053
|
+
|
|
1054
|
+
if (hasDirectHint) {
|
|
1055
|
+
try {
|
|
1056
|
+
await this.rpc.send(message, {
|
|
1057
|
+
mode: new AcknowledgeDelivery({
|
|
1058
|
+
redundancy: 1,
|
|
1059
|
+
to: [peer],
|
|
1060
|
+
}),
|
|
1061
|
+
});
|
|
1062
|
+
return;
|
|
1063
|
+
} catch {
|
|
1064
|
+
// Fall back to fanout token/direct fanout unicast below.
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
if (fanoutHint && this._fanoutChannel) {
|
|
1069
|
+
try {
|
|
1070
|
+
await this._fanoutChannel.unicastAck(
|
|
1071
|
+
fanoutHint.route,
|
|
1072
|
+
payload,
|
|
1073
|
+
fanoutUnicastOptions,
|
|
1074
|
+
);
|
|
1075
|
+
return;
|
|
1076
|
+
} catch {
|
|
1077
|
+
// Fall back below.
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
if (this._fanoutChannel) {
|
|
1082
|
+
try {
|
|
1083
|
+
await this._fanoutChannel.unicastToAck(
|
|
1084
|
+
peer,
|
|
1085
|
+
payload,
|
|
1086
|
+
fanoutUnicastOptions,
|
|
1087
|
+
);
|
|
1088
|
+
return;
|
|
1089
|
+
} catch {
|
|
1090
|
+
// Fall back below.
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
await this.rpc.send(message, {
|
|
1095
|
+
mode: new AcknowledgeDelivery({
|
|
1096
|
+
redundancy: 1,
|
|
1097
|
+
to: [peer],
|
|
1098
|
+
}),
|
|
1099
|
+
});
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
private async _appendDeliverToReplicators(
|
|
1103
|
+
entry: Entry<T>,
|
|
1104
|
+
minReplicasValue: number,
|
|
1105
|
+
leaders: Map<string, any>,
|
|
1106
|
+
selfHash: string,
|
|
1107
|
+
isLeader: boolean,
|
|
1108
|
+
deliveryArg: false | true | DeliveryOptions | undefined,
|
|
1109
|
+
) {
|
|
1110
|
+
const { delivery, reliability, requireRecipients, minAcks, wrap } =
|
|
1111
|
+
this._parseDeliveryOptions(deliveryArg);
|
|
1112
|
+
const pending: Promise<void>[] = [];
|
|
1113
|
+
const track = (promise: Promise<void>) => {
|
|
1114
|
+
pending.push(wrap ? wrap(promise) : promise);
|
|
1115
|
+
};
|
|
1116
|
+
const fanoutUnicastOptions =
|
|
1117
|
+
delivery?.timeout != null || delivery?.signal != null
|
|
1118
|
+
? { timeoutMs: delivery.timeout, signal: delivery.signal }
|
|
1119
|
+
: undefined;
|
|
1120
|
+
|
|
1121
|
+
for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
|
|
1122
|
+
await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
|
|
1123
|
+
const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
|
|
1124
|
+
|
|
1125
|
+
const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
|
|
1126
|
+
let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
1127
|
+
const allowSubscriberFallback =
|
|
1128
|
+
this.syncronizer instanceof SimpleSyncronizer ||
|
|
1129
|
+
(this.compatibility ?? Number.MAX_VALUE) < 10;
|
|
1130
|
+
if (!hasRemotePeers && allowSubscriberFallback) {
|
|
1131
|
+
try {
|
|
1132
|
+
const subscribers = await this._getTopicSubscribers(this.topic);
|
|
1133
|
+
if (subscribers && subscribers.length > 0) {
|
|
1134
|
+
for (const subscriber of subscribers) {
|
|
1135
|
+
const hash = subscriber.hashcode();
|
|
1136
|
+
if (hash === selfHash) {
|
|
1137
|
+
continue;
|
|
1138
|
+
}
|
|
1139
|
+
set.add(hash);
|
|
1140
|
+
leadersForDelivery?.add(hash);
|
|
1141
|
+
}
|
|
1142
|
+
hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
1143
|
+
}
|
|
1144
|
+
} catch {
|
|
1145
|
+
// Best-effort only; keep discovered recipients as-is.
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
if (!hasRemotePeers) {
|
|
1149
|
+
if (requireRecipients) {
|
|
1150
|
+
throw new NoPeersError(this.rpc.topic);
|
|
1151
|
+
}
|
|
1152
|
+
continue;
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
if (!delivery) {
|
|
1156
|
+
this.rpc
|
|
1157
|
+
.send(message, {
|
|
1158
|
+
mode: isLeader
|
|
1159
|
+
? new SilentDelivery({ redundancy: 1, to: set })
|
|
1160
|
+
: new AcknowledgeDelivery({ redundancy: 1, to: set }),
|
|
1161
|
+
})
|
|
1162
|
+
.catch((error) => logger.error(error));
|
|
1163
|
+
continue;
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
const orderedRemoteRecipients: string[] = [];
|
|
1167
|
+
for (const peer of leadersForDelivery!) {
|
|
1168
|
+
if (peer === selfHash) {
|
|
1169
|
+
continue;
|
|
1170
|
+
}
|
|
1171
|
+
orderedRemoteRecipients.push(peer);
|
|
1172
|
+
}
|
|
1173
|
+
for (const peer of set) {
|
|
1174
|
+
if (peer === selfHash) {
|
|
1175
|
+
continue;
|
|
1176
|
+
}
|
|
1177
|
+
if (leadersForDelivery!.has(peer)) {
|
|
1178
|
+
continue;
|
|
1179
|
+
}
|
|
1180
|
+
orderedRemoteRecipients.push(peer);
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1183
|
+
const ackTo: string[] = [];
|
|
1184
|
+
let silentTo: string[] | undefined;
|
|
1185
|
+
// Default delivery semantics: require enough remote ACKs to reach the requested
|
|
1186
|
+
// replication degree (local append counts as 1).
|
|
1187
|
+
const defaultMinAcks = Math.max(0, minReplicasValue - 1);
|
|
1188
|
+
const ackLimitRaw =
|
|
1189
|
+
reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
|
|
1190
|
+
const ackLimit = Math.max(
|
|
1191
|
+
0,
|
|
1192
|
+
Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length),
|
|
1193
|
+
);
|
|
1194
|
+
|
|
1195
|
+
for (const peer of orderedRemoteRecipients) {
|
|
1196
|
+
if (ackTo.length < ackLimit) {
|
|
1197
|
+
ackTo.push(peer);
|
|
1198
|
+
} else {
|
|
1199
|
+
silentTo ||= [];
|
|
1200
|
+
silentTo.push(peer);
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
if (requireRecipients && orderedRemoteRecipients.length === 0) {
|
|
1205
|
+
throw new NoPeersError(this.rpc.topic);
|
|
1206
|
+
}
|
|
1207
|
+
if (requireRecipients && ackTo.length + (silentTo?.length || 0) === 0) {
|
|
1208
|
+
throw new NoPeersError(this.rpc.topic);
|
|
1209
|
+
}
|
|
1210
|
+
|
|
1211
|
+
if (ackTo.length > 0) {
|
|
1212
|
+
const payload = serialize(message);
|
|
1213
|
+
for (const peer of ackTo) {
|
|
1214
|
+
track(
|
|
1215
|
+
(async () => {
|
|
1216
|
+
await this._sendAckWithUnifiedHints({
|
|
1217
|
+
peer,
|
|
1218
|
+
message,
|
|
1219
|
+
payload,
|
|
1220
|
+
fanoutUnicastOptions,
|
|
1221
|
+
});
|
|
1222
|
+
})(),
|
|
1223
|
+
);
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
if (silentTo?.length) {
|
|
1228
|
+
this.rpc
|
|
1229
|
+
.send(message, {
|
|
1230
|
+
mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
|
|
1231
|
+
})
|
|
1232
|
+
.catch((error) => logger.error(error));
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
if (pending.length > 0) {
|
|
1237
|
+
await Promise.all(pending);
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
private async _mergeLeadersFromGidReferences(
|
|
1242
|
+
message: ExchangeHeadsMessage<any>,
|
|
1243
|
+
minReplicasValue: number,
|
|
1244
|
+
leaders: Map<string, any>,
|
|
1245
|
+
) {
|
|
1246
|
+
const gidReferences = message.heads[0]?.gidRefrences;
|
|
1247
|
+
if (!gidReferences || gidReferences.length === 0) {
|
|
1248
|
+
return;
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
for (const gidReference of gidReferences) {
|
|
1252
|
+
const entryFromGid = this.log.entryIndex.getHeads(gidReference, false);
|
|
1253
|
+
for (const gidEntry of await entryFromGid.all()) {
|
|
1254
|
+
let coordinates = await this.getCoordinates(gidEntry);
|
|
1255
|
+
if (coordinates == null) {
|
|
1256
|
+
coordinates = await this.createCoordinates(gidEntry, minReplicasValue);
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1259
|
+
const found = await this._findLeaders(coordinates);
|
|
1260
|
+
for (const [key, value] of found) {
|
|
1261
|
+
leaders.set(key, value);
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
private async _appendDeliverToAllFanout(entry: Entry<T>) {
|
|
1268
|
+
for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
|
|
1269
|
+
await this._publishExchangeHeadsViaFanout(message);
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
private async _resolvePublicKeyFromHash(
|
|
1274
|
+
hash: string,
|
|
1275
|
+
): Promise<PublicSignKey | undefined> {
|
|
1276
|
+
const fanoutService = (this.node.services as any).fanout;
|
|
1277
|
+
return (
|
|
1278
|
+
fanoutService?.getPublicKey?.(hash) ??
|
|
1279
|
+
this.node.services.pubsub.getPublicKey(hash)
|
|
1280
|
+
);
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
private async _getTopicSubscribers(
|
|
1284
|
+
topic: string,
|
|
1285
|
+
): Promise<PublicSignKey[] | undefined> {
|
|
1286
|
+
const maxPeers = 64;
|
|
1287
|
+
|
|
1288
|
+
// Prefer the bounded peer set we already know from the fanout overlay.
|
|
1289
|
+
if (this._fanoutChannel && (topic === this.topic || topic === this.rpc.topic)) {
|
|
1290
|
+
const hashes = this._fanoutChannel
|
|
1291
|
+
.getPeerHashes({ includeSelf: false })
|
|
1292
|
+
.slice(0, maxPeers);
|
|
1293
|
+
if (hashes.length === 0) return [];
|
|
1294
|
+
|
|
1295
|
+
const keys = await Promise.all(
|
|
1296
|
+
hashes.map((hash) => this._resolvePublicKeyFromHash(hash)),
|
|
1297
|
+
);
|
|
1298
|
+
const uniqueKeys: PublicSignKey[] = [];
|
|
1299
|
+
const seen = new Set<string>();
|
|
1300
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
1301
|
+
for (const key of keys) {
|
|
1302
|
+
if (!key) continue;
|
|
1303
|
+
const hash = key.hashcode();
|
|
1304
|
+
if (hash === selfHash) continue;
|
|
1305
|
+
if (seen.has(hash)) continue;
|
|
1306
|
+
seen.add(hash);
|
|
1307
|
+
uniqueKeys.push(key);
|
|
1308
|
+
}
|
|
1309
|
+
return uniqueKeys;
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
1313
|
+
const hashes: string[] = [];
|
|
1314
|
+
|
|
1315
|
+
// Best-effort provider discovery (bounded). This requires bootstrap trackers.
|
|
1316
|
+
try {
|
|
1317
|
+
const fanoutService = (this.node.services as any).fanout;
|
|
1318
|
+
if (fanoutService?.queryProviders) {
|
|
1319
|
+
const ns = `shared-log|${this.topic}`;
|
|
1320
|
+
const seed = hashToSeed32(topic);
|
|
1321
|
+
const providers: string[] = await fanoutService.queryProviders(ns, {
|
|
1322
|
+
want: maxPeers,
|
|
1323
|
+
seed,
|
|
1324
|
+
});
|
|
1325
|
+
for (const h of providers ?? []) {
|
|
1326
|
+
if (!h || h === selfHash) continue;
|
|
1327
|
+
hashes.push(h);
|
|
1328
|
+
if (hashes.length >= maxPeers) break;
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
} catch {
|
|
1332
|
+
// Best-effort only.
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
// Next, use already-connected peer streams (bounded and cheap).
|
|
1336
|
+
const peerMap: Map<string, unknown> | undefined = (this.node.services.pubsub as any)
|
|
1337
|
+
?.peers;
|
|
1338
|
+
if (peerMap?.keys) {
|
|
1339
|
+
for (const h of peerMap.keys()) {
|
|
1340
|
+
if (!h || h === selfHash) continue;
|
|
1341
|
+
hashes.push(h);
|
|
1342
|
+
if (hashes.length >= maxPeers) break;
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
// Finally, fall back to libp2p connections (e.g. bootstrap peers) without requiring
|
|
1347
|
+
// any global topic membership view.
|
|
1348
|
+
if (hashes.length < maxPeers) {
|
|
1349
|
+
const connectionManager = (this.node.services.pubsub as any)?.components
|
|
1350
|
+
?.connectionManager;
|
|
1351
|
+
const connections = connectionManager?.getConnections?.() ?? [];
|
|
1352
|
+
for (const conn of connections) {
|
|
1353
|
+
const peerId = conn?.remotePeer;
|
|
1354
|
+
if (!peerId) continue;
|
|
1355
|
+
try {
|
|
1356
|
+
const h = getPublicKeyFromPeerId(peerId).hashcode();
|
|
1357
|
+
if (!h || h === selfHash) continue;
|
|
1358
|
+
hashes.push(h);
|
|
1359
|
+
if (hashes.length >= maxPeers) break;
|
|
1360
|
+
} catch {
|
|
1361
|
+
// Best-effort only.
|
|
1362
|
+
}
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
|
|
1366
|
+
if (hashes.length === 0) return [];
|
|
1367
|
+
|
|
1368
|
+
const uniqueHashes: string[] = [];
|
|
1369
|
+
const seen = new Set<string>();
|
|
1370
|
+
for (const h of hashes) {
|
|
1371
|
+
if (seen.has(h)) continue;
|
|
1372
|
+
seen.add(h);
|
|
1373
|
+
uniqueHashes.push(h);
|
|
1374
|
+
if (uniqueHashes.length >= maxPeers) break;
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
const keys = await Promise.all(
|
|
1378
|
+
uniqueHashes.map((hash) => this._resolvePublicKeyFromHash(hash)),
|
|
1379
|
+
);
|
|
1380
|
+
const uniqueKeys: PublicSignKey[] = [];
|
|
1381
|
+
for (const key of keys) {
|
|
1382
|
+
if (!key) continue;
|
|
1383
|
+
const hash = key.hashcode();
|
|
1384
|
+
if (hash === selfHash) continue;
|
|
1385
|
+
uniqueKeys.push(key);
|
|
1386
|
+
}
|
|
1387
|
+
return uniqueKeys;
|
|
1388
|
+
}
|
|
1389
|
+
|
|
600
1390
|
// @deprecated
|
|
601
1391
|
private async getRole() {
|
|
602
1392
|
const segments = await this.getMyReplicationSegments();
|
|
@@ -1004,8 +1794,9 @@ export class SharedLog<
|
|
|
1004
1794
|
})
|
|
1005
1795
|
.all();
|
|
1006
1796
|
|
|
1007
|
-
|
|
1008
|
-
|
|
1797
|
+
this.uniqueReplicators.delete(keyHash);
|
|
1798
|
+
this._replicatorJoinEmitted.delete(keyHash);
|
|
1799
|
+
await this.replicationIndex.del({ query: { hash: keyHash } });
|
|
1009
1800
|
|
|
1010
1801
|
await this.updateOldestTimestampFromIndex();
|
|
1011
1802
|
|
|
@@ -1030,14 +1821,14 @@ export class SharedLog<
|
|
|
1030
1821
|
}
|
|
1031
1822
|
}
|
|
1032
1823
|
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1824
|
+
const timestamp = BigInt(+new Date());
|
|
1825
|
+
for (const x of deleted) {
|
|
1826
|
+
this.replicationChangeDebounceFn.add({
|
|
1827
|
+
range: x.value,
|
|
1828
|
+
type: "removed",
|
|
1829
|
+
timestamp,
|
|
1830
|
+
});
|
|
1831
|
+
}
|
|
1041
1832
|
|
|
1042
1833
|
const pendingMaturity = this.pendingMaturity.get(keyHash);
|
|
1043
1834
|
if (pendingMaturity) {
|
|
@@ -1047,6 +1838,14 @@ export class SharedLog<
|
|
|
1047
1838
|
this.pendingMaturity.delete(keyHash);
|
|
1048
1839
|
}
|
|
1049
1840
|
|
|
1841
|
+
// Keep local sync/prune state consistent even when a peer disappears
|
|
1842
|
+
// through replication-info updates without a topic unsubscribe event.
|
|
1843
|
+
this.removePeerFromGidPeerHistory(keyHash);
|
|
1844
|
+
this._recentRepairDispatch.delete(keyHash);
|
|
1845
|
+
if (!isMe) {
|
|
1846
|
+
this.syncronizer.onPeerDisconnected(keyHash);
|
|
1847
|
+
}
|
|
1848
|
+
|
|
1050
1849
|
if (!isMe) {
|
|
1051
1850
|
this.rebalanceParticipationDebounced?.call();
|
|
1052
1851
|
}
|
|
@@ -1118,9 +1917,10 @@ export class SharedLog<
|
|
|
1118
1917
|
{ query: { hash: from.hashcode() } },
|
|
1119
1918
|
{ shape: { id: true } },
|
|
1120
1919
|
);
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1920
|
+
if ((await otherSegmentsIterator.next(1)).length === 0) {
|
|
1921
|
+
this.uniqueReplicators.delete(from.hashcode());
|
|
1922
|
+
this._replicatorJoinEmitted.delete(from.hashcode());
|
|
1923
|
+
}
|
|
1124
1924
|
await otherSegmentsIterator.close();
|
|
1125
1925
|
|
|
1126
1926
|
await this.updateOldestTimestampFromIndex();
|
|
@@ -1160,6 +1960,7 @@ export class SharedLog<
|
|
|
1160
1960
|
|
|
1161
1961
|
let diffs: ReplicationChanges<ReplicationRangeIndexable<R>>;
|
|
1162
1962
|
let deleted: ReplicationRangeIndexable<R>[] | undefined = undefined;
|
|
1963
|
+
let isStoppedReplicating = false;
|
|
1163
1964
|
if (reset) {
|
|
1164
1965
|
deleted = (
|
|
1165
1966
|
await this.replicationIndex
|
|
@@ -1198,6 +1999,7 @@ export class SharedLog<
|
|
|
1198
1999
|
}
|
|
1199
2000
|
|
|
1200
2001
|
isNewReplicator = prevCount === 0 && ranges.length > 0;
|
|
2002
|
+
isStoppedReplicating = prevCount > 0 && ranges.length === 0;
|
|
1201
2003
|
} else {
|
|
1202
2004
|
let batchSize = 100;
|
|
1203
2005
|
let existing: ReplicationRangeIndexable<R>[] = [];
|
|
@@ -1281,7 +2083,16 @@ export class SharedLog<
|
|
|
1281
2083
|
diffs = changes;
|
|
1282
2084
|
}
|
|
1283
2085
|
|
|
1284
|
-
|
|
2086
|
+
const fromHash = from.hashcode();
|
|
2087
|
+
// Track replicator membership transitions synchronously so join/leave events are
|
|
2088
|
+
// idempotent even if we process concurrent reset messages/unsubscribes.
|
|
2089
|
+
const stoppedTransition =
|
|
2090
|
+
ranges.length === 0 ? this.uniqueReplicators.delete(fromHash) : false;
|
|
2091
|
+
if (ranges.length === 0) {
|
|
2092
|
+
this._replicatorJoinEmitted.delete(fromHash);
|
|
2093
|
+
} else {
|
|
2094
|
+
this.uniqueReplicators.add(fromHash);
|
|
2095
|
+
}
|
|
1285
2096
|
|
|
1286
2097
|
let now = +new Date();
|
|
1287
2098
|
let minRoleAge = await this.getDefaultMinRoleAge();
|
|
@@ -1327,13 +2138,13 @@ export class SharedLog<
|
|
|
1327
2138
|
}),
|
|
1328
2139
|
);
|
|
1329
2140
|
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
2141
|
+
if (rebalance && diff.range.mode !== ReplicationIntent.Strict) {
|
|
2142
|
+
// TODO this statement (might) cause issues with triggering pruning if the segment is strict and maturity timings will affect the outcome of rebalancing
|
|
2143
|
+
this.replicationChangeDebounceFn.add({
|
|
2144
|
+
...diff,
|
|
2145
|
+
matured: true,
|
|
2146
|
+
}); // we need to call this here because the outcom of findLeaders will be different when some ranges become mature, i.e. some of data we own might be prunable!
|
|
2147
|
+
}
|
|
1337
2148
|
pendingRanges.delete(diff.range.idString);
|
|
1338
2149
|
if (pendingRanges.size === 0) {
|
|
1339
2150
|
this.pendingMaturity.delete(diff.range.hash);
|
|
@@ -1379,28 +2190,39 @@ export class SharedLog<
|
|
|
1379
2190
|
}),
|
|
1380
2191
|
);
|
|
1381
2192
|
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
2193
|
+
if (isNewReplicator) {
|
|
2194
|
+
if (!this._replicatorJoinEmitted.has(fromHash)) {
|
|
2195
|
+
this._replicatorJoinEmitted.add(fromHash);
|
|
2196
|
+
this.events.dispatchEvent(
|
|
2197
|
+
new CustomEvent<ReplicatorJoinEvent>("replicator:join", {
|
|
2198
|
+
detail: { publicKey: from },
|
|
2199
|
+
}),
|
|
2200
|
+
);
|
|
2201
|
+
}
|
|
1388
2202
|
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
2203
|
+
if (isAllMature) {
|
|
2204
|
+
this.events.dispatchEvent(
|
|
2205
|
+
new CustomEvent<ReplicatorMatureEvent>("replicator:mature", {
|
|
2206
|
+
detail: { publicKey: from },
|
|
1393
2207
|
}),
|
|
1394
2208
|
);
|
|
1395
2209
|
}
|
|
1396
2210
|
}
|
|
1397
2211
|
|
|
1398
|
-
if (
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
2212
|
+
if (isStoppedReplicating && stoppedTransition) {
|
|
2213
|
+
this.events.dispatchEvent(
|
|
2214
|
+
new CustomEvent<ReplicatorLeaveEvent>("replicator:leave", {
|
|
2215
|
+
detail: { publicKey: from },
|
|
2216
|
+
}),
|
|
2217
|
+
);
|
|
1402
2218
|
}
|
|
1403
2219
|
|
|
2220
|
+
if (rebalance) {
|
|
2221
|
+
for (const diff of diffs) {
|
|
2222
|
+
this.replicationChangeDebounceFn.add(diff);
|
|
2223
|
+
}
|
|
2224
|
+
}
|
|
2225
|
+
|
|
1404
2226
|
if (!from.equals(this.node.identity.publicKey)) {
|
|
1405
2227
|
this.rebalanceParticipationDebounced?.call();
|
|
1406
2228
|
}
|
|
@@ -1432,6 +2254,20 @@ export class SharedLog<
|
|
|
1432
2254
|
if (change) {
|
|
1433
2255
|
let addedOrReplaced = change.filter((x) => x.type !== "removed");
|
|
1434
2256
|
if (addedOrReplaced.length > 0) {
|
|
2257
|
+
// Provider discovery keep-alive (best-effort). This enables bounded targeted fetches
|
|
2258
|
+
// without relying on any global subscriber list.
|
|
2259
|
+
try {
|
|
2260
|
+
const fanoutService = (this.node.services as any).fanout;
|
|
2261
|
+
if (fanoutService?.provide && !this._providerHandle) {
|
|
2262
|
+
this._providerHandle = fanoutService.provide(`shared-log|${this.topic}`, {
|
|
2263
|
+
ttlMs: 120_000,
|
|
2264
|
+
announceIntervalMs: 60_000,
|
|
2265
|
+
});
|
|
2266
|
+
}
|
|
2267
|
+
} catch {
|
|
2268
|
+
// Best-effort only.
|
|
2269
|
+
}
|
|
2270
|
+
|
|
1435
2271
|
let message:
|
|
1436
2272
|
| AllReplicatingSegmentsMessage
|
|
1437
2273
|
| AddedReplicationSegmentMessage
|
|
@@ -1494,6 +2330,218 @@ export class SharedLog<
|
|
|
1494
2330
|
return set;
|
|
1495
2331
|
}
|
|
1496
2332
|
|
|
2333
|
+
private dispatchMaybeMissingEntries(
|
|
2334
|
+
target: string,
|
|
2335
|
+
entries: Map<string, EntryReplicated<R>>,
|
|
2336
|
+
options?: {
|
|
2337
|
+
bypassRecentDedupe?: boolean;
|
|
2338
|
+
retryScheduleMs?: number[];
|
|
2339
|
+
forceFreshDelivery?: boolean;
|
|
2340
|
+
},
|
|
2341
|
+
) {
|
|
2342
|
+
if (entries.size === 0) {
|
|
2343
|
+
return;
|
|
2344
|
+
}
|
|
2345
|
+
|
|
2346
|
+
const now = Date.now();
|
|
2347
|
+
let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
|
|
2348
|
+
if (!recentlyDispatchedByHash) {
|
|
2349
|
+
recentlyDispatchedByHash = new Map();
|
|
2350
|
+
this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
|
|
2351
|
+
}
|
|
2352
|
+
for (const [hash, ts] of recentlyDispatchedByHash) {
|
|
2353
|
+
if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
2354
|
+
recentlyDispatchedByHash.delete(hash);
|
|
2355
|
+
}
|
|
2356
|
+
}
|
|
2357
|
+
|
|
2358
|
+
const filteredEntries =
|
|
2359
|
+
options?.bypassRecentDedupe === true
|
|
2360
|
+
? new Map(entries)
|
|
2361
|
+
: new Map<string, EntryReplicated<any>>();
|
|
2362
|
+
if (options?.bypassRecentDedupe !== true) {
|
|
2363
|
+
for (const [hash, entry] of entries) {
|
|
2364
|
+
const prev = recentlyDispatchedByHash.get(hash);
|
|
2365
|
+
if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
2366
|
+
continue;
|
|
2367
|
+
}
|
|
2368
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
2369
|
+
filteredEntries.set(hash, entry);
|
|
2370
|
+
}
|
|
2371
|
+
} else {
|
|
2372
|
+
for (const hash of entries.keys()) {
|
|
2373
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
2374
|
+
}
|
|
2375
|
+
}
|
|
2376
|
+
if (filteredEntries.size === 0) {
|
|
2377
|
+
return;
|
|
2378
|
+
}
|
|
2379
|
+
|
|
2380
|
+
const run = () => {
|
|
2381
|
+
// For force-fresh churn repair we intentionally bypass rateless IBLT and
|
|
2382
|
+
// use simple hash-based sync. This path is a directed "push these hashes
|
|
2383
|
+
// to that peer" recovery flow; using simple sync here avoids occasional
|
|
2384
|
+
// single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
|
|
2385
|
+
if (
|
|
2386
|
+
options?.forceFreshDelivery &&
|
|
2387
|
+
this.syncronizer instanceof RatelessIBLTSynchronizer
|
|
2388
|
+
) {
|
|
2389
|
+
return Promise.resolve(
|
|
2390
|
+
this.syncronizer.simple.onMaybeMissingEntries({
|
|
2391
|
+
entries: filteredEntries,
|
|
2392
|
+
targets: [target],
|
|
2393
|
+
}),
|
|
2394
|
+
).catch((error: any) => logger.error(error));
|
|
2395
|
+
}
|
|
2396
|
+
|
|
2397
|
+
return Promise.resolve(
|
|
2398
|
+
this.syncronizer.onMaybeMissingEntries({
|
|
2399
|
+
entries: filteredEntries,
|
|
2400
|
+
targets: [target],
|
|
2401
|
+
}),
|
|
2402
|
+
).catch((error: any) => logger.error(error));
|
|
2403
|
+
};
|
|
2404
|
+
|
|
2405
|
+
const retrySchedule =
|
|
2406
|
+
options?.retryScheduleMs && options.retryScheduleMs.length > 0
|
|
2407
|
+
? options.retryScheduleMs
|
|
2408
|
+
: options?.forceFreshDelivery
|
|
2409
|
+
? FORCE_FRESH_RETRY_SCHEDULE_MS
|
|
2410
|
+
: [0];
|
|
2411
|
+
|
|
2412
|
+
for (const delayMs of retrySchedule) {
|
|
2413
|
+
if (delayMs === 0) {
|
|
2414
|
+
void run();
|
|
2415
|
+
continue;
|
|
2416
|
+
}
|
|
2417
|
+
const timer = setTimeout(() => {
|
|
2418
|
+
this._repairRetryTimers.delete(timer);
|
|
2419
|
+
if (this.closed) {
|
|
2420
|
+
return;
|
|
2421
|
+
}
|
|
2422
|
+
void run();
|
|
2423
|
+
}, delayMs);
|
|
2424
|
+
timer.unref?.();
|
|
2425
|
+
this._repairRetryTimers.add(timer);
|
|
2426
|
+
}
|
|
2427
|
+
}
|
|
2428
|
+
|
|
2429
|
+
private scheduleRepairSweep(options: {
|
|
2430
|
+
forceFreshDelivery: boolean;
|
|
2431
|
+
addedPeers: Set<string>;
|
|
2432
|
+
}) {
|
|
2433
|
+
if (options.forceFreshDelivery) {
|
|
2434
|
+
this._repairSweepForceFreshPending = true;
|
|
2435
|
+
}
|
|
2436
|
+
for (const peer of options.addedPeers) {
|
|
2437
|
+
this._repairSweepAddedPeersPending.add(peer);
|
|
2438
|
+
}
|
|
2439
|
+
if (!this._repairSweepRunning && !this.closed) {
|
|
2440
|
+
this._repairSweepRunning = true;
|
|
2441
|
+
void this.runRepairSweep();
|
|
2442
|
+
}
|
|
2443
|
+
}
|
|
2444
|
+
|
|
2445
|
+
private async runRepairSweep() {
|
|
2446
|
+
try {
|
|
2447
|
+
while (!this.closed) {
|
|
2448
|
+
const forceFreshDelivery = this._repairSweepForceFreshPending;
|
|
2449
|
+
const addedPeers = new Set(this._repairSweepAddedPeersPending);
|
|
2450
|
+
this._repairSweepForceFreshPending = false;
|
|
2451
|
+
this._repairSweepAddedPeersPending.clear();
|
|
2452
|
+
|
|
2453
|
+
if (!forceFreshDelivery && addedPeers.size === 0) {
|
|
2454
|
+
return;
|
|
2455
|
+
}
|
|
2456
|
+
|
|
2457
|
+
const pendingByTarget = new Map<string, Map<string, EntryReplicated<any>>>();
|
|
2458
|
+
const flushTarget = (target: string) => {
|
|
2459
|
+
const entries = pendingByTarget.get(target);
|
|
2460
|
+
if (!entries || entries.size === 0) {
|
|
2461
|
+
return;
|
|
2462
|
+
}
|
|
2463
|
+
const isJoinWarmupTarget = addedPeers.has(target);
|
|
2464
|
+
const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
|
|
2465
|
+
this.dispatchMaybeMissingEntries(target, entries, {
|
|
2466
|
+
bypassRecentDedupe,
|
|
2467
|
+
retryScheduleMs: isJoinWarmupTarget
|
|
2468
|
+
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
2469
|
+
: undefined,
|
|
2470
|
+
forceFreshDelivery,
|
|
2471
|
+
});
|
|
2472
|
+
pendingByTarget.delete(target);
|
|
2473
|
+
};
|
|
2474
|
+
const queueEntryForTarget = (
|
|
2475
|
+
target: string,
|
|
2476
|
+
entry: EntryReplicated<any>,
|
|
2477
|
+
) => {
|
|
2478
|
+
let set = pendingByTarget.get(target);
|
|
2479
|
+
if (!set) {
|
|
2480
|
+
set = new Map();
|
|
2481
|
+
pendingByTarget.set(target, set);
|
|
2482
|
+
}
|
|
2483
|
+
if (set.has(entry.hash)) {
|
|
2484
|
+
return;
|
|
2485
|
+
}
|
|
2486
|
+
set.set(entry.hash, entry);
|
|
2487
|
+
if (set.size >= this.repairSweepTargetBufferSize) {
|
|
2488
|
+
flushTarget(target);
|
|
2489
|
+
}
|
|
2490
|
+
};
|
|
2491
|
+
|
|
2492
|
+
const iterator = this.entryCoordinatesIndex.iterate({});
|
|
2493
|
+
try {
|
|
2494
|
+
while (!this.closed && !iterator.done()) {
|
|
2495
|
+
const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
|
|
2496
|
+
for (const entry of entries) {
|
|
2497
|
+
const entryReplicated = entry.value;
|
|
2498
|
+
const currentPeers = await this.findLeaders(
|
|
2499
|
+
entryReplicated.coordinates,
|
|
2500
|
+
entryReplicated,
|
|
2501
|
+
{ roleAge: 0 },
|
|
2502
|
+
);
|
|
2503
|
+
if (forceFreshDelivery) {
|
|
2504
|
+
for (const [currentPeer] of currentPeers) {
|
|
2505
|
+
if (currentPeer === this.node.identity.publicKey.hashcode()) {
|
|
2506
|
+
continue;
|
|
2507
|
+
}
|
|
2508
|
+
queueEntryForTarget(currentPeer, entryReplicated);
|
|
2509
|
+
}
|
|
2510
|
+
}
|
|
2511
|
+
if (addedPeers.size > 0) {
|
|
2512
|
+
for (const peer of addedPeers) {
|
|
2513
|
+
if (currentPeers.has(peer)) {
|
|
2514
|
+
queueEntryForTarget(peer, entryReplicated);
|
|
2515
|
+
}
|
|
2516
|
+
}
|
|
2517
|
+
}
|
|
2518
|
+
}
|
|
2519
|
+
}
|
|
2520
|
+
} finally {
|
|
2521
|
+
await iterator.close();
|
|
2522
|
+
}
|
|
2523
|
+
|
|
2524
|
+
for (const target of [...pendingByTarget.keys()]) {
|
|
2525
|
+
flushTarget(target);
|
|
2526
|
+
}
|
|
2527
|
+
}
|
|
2528
|
+
} catch (error: any) {
|
|
2529
|
+
if (!isNotStartedError(error)) {
|
|
2530
|
+
logger.error(`Repair sweep failed: ${error?.message ?? error}`);
|
|
2531
|
+
}
|
|
2532
|
+
} finally {
|
|
2533
|
+
this._repairSweepRunning = false;
|
|
2534
|
+
if (
|
|
2535
|
+
!this.closed &&
|
|
2536
|
+
(this._repairSweepForceFreshPending ||
|
|
2537
|
+
this._repairSweepAddedPeersPending.size > 0)
|
|
2538
|
+
) {
|
|
2539
|
+
this._repairSweepRunning = true;
|
|
2540
|
+
void this.runRepairSweep();
|
|
2541
|
+
}
|
|
2542
|
+
}
|
|
2543
|
+
}
|
|
2544
|
+
|
|
1497
2545
|
private async pruneDebouncedFnAddIfNotKeeping(args: {
|
|
1498
2546
|
key: string;
|
|
1499
2547
|
value: {
|
|
@@ -1501,9 +2549,85 @@ export class SharedLog<
|
|
|
1501
2549
|
leaders: Map<string, any>;
|
|
1502
2550
|
};
|
|
1503
2551
|
}) {
|
|
1504
|
-
if (!this.keep || !(await this.keep(args.value.entry))) {
|
|
1505
|
-
return this.pruneDebouncedFn.add(args);
|
|
2552
|
+
if (!this.keep || !(await this.keep(args.value.entry))) {
|
|
2553
|
+
return this.pruneDebouncedFn.add(args);
|
|
2554
|
+
}
|
|
2555
|
+
}
|
|
2556
|
+
|
|
2557
|
+
private clearCheckedPruneRetry(hash: string) {
|
|
2558
|
+
const state = this._checkedPruneRetries.get(hash);
|
|
2559
|
+
if (state?.timer) {
|
|
2560
|
+
clearTimeout(state.timer);
|
|
2561
|
+
}
|
|
2562
|
+
this._checkedPruneRetries.delete(hash);
|
|
2563
|
+
}
|
|
2564
|
+
|
|
2565
|
+
private scheduleCheckedPruneRetry(args: {
|
|
2566
|
+
entry: EntryReplicated<R> | ShallowOrFullEntry<any>;
|
|
2567
|
+
leaders: Map<string, unknown> | Set<string>;
|
|
2568
|
+
}) {
|
|
2569
|
+
if (this.closed) return;
|
|
2570
|
+
if (this._pendingDeletes.has(args.entry.hash)) return;
|
|
2571
|
+
|
|
2572
|
+
const hash = args.entry.hash;
|
|
2573
|
+
const state =
|
|
2574
|
+
this._checkedPruneRetries.get(hash) ?? { attempts: 0 };
|
|
2575
|
+
|
|
2576
|
+
if (state.timer) return;
|
|
2577
|
+
if (state.attempts >= CHECKED_PRUNE_RETRY_MAX_ATTEMPTS) {
|
|
2578
|
+
// Avoid unbounded background retries; a new replication-change event can
|
|
2579
|
+
// always re-enqueue pruning with fresh leader info.
|
|
2580
|
+
return;
|
|
1506
2581
|
}
|
|
2582
|
+
|
|
2583
|
+
const attempt = state.attempts + 1;
|
|
2584
|
+
const jitterMs = Math.floor(Math.random() * 250);
|
|
2585
|
+
const delayMs = Math.min(
|
|
2586
|
+
CHECKED_PRUNE_RETRY_MAX_DELAY_MS,
|
|
2587
|
+
1_000 * 2 ** (attempt - 1) + jitterMs,
|
|
2588
|
+
);
|
|
2589
|
+
|
|
2590
|
+
state.attempts = attempt;
|
|
2591
|
+
state.timer = setTimeout(async () => {
|
|
2592
|
+
const st = this._checkedPruneRetries.get(hash);
|
|
2593
|
+
if (st) st.timer = undefined;
|
|
2594
|
+
if (this.closed) return;
|
|
2595
|
+
if (this._pendingDeletes.has(hash)) return;
|
|
2596
|
+
|
|
2597
|
+
let leadersMap: Map<string, any> | undefined;
|
|
2598
|
+
try {
|
|
2599
|
+
const replicas = decodeReplicas(args.entry).getValue(this);
|
|
2600
|
+
leadersMap = await this.findLeadersFromEntry(args.entry, replicas, {
|
|
2601
|
+
roleAge: 0,
|
|
2602
|
+
});
|
|
2603
|
+
} catch {
|
|
2604
|
+
// Best-effort only.
|
|
2605
|
+
}
|
|
2606
|
+
|
|
2607
|
+
if (!leadersMap || leadersMap.size === 0) {
|
|
2608
|
+
if (args.leaders instanceof Map) {
|
|
2609
|
+
leadersMap = args.leaders as any;
|
|
2610
|
+
} else {
|
|
2611
|
+
leadersMap = new Map<string, any>();
|
|
2612
|
+
for (const k of args.leaders) {
|
|
2613
|
+
leadersMap.set(k, { intersecting: true });
|
|
2614
|
+
}
|
|
2615
|
+
}
|
|
2616
|
+
}
|
|
2617
|
+
|
|
2618
|
+
try {
|
|
2619
|
+
const leadersForRetry = leadersMap ?? new Map<string, any>();
|
|
2620
|
+
await this.pruneDebouncedFnAddIfNotKeeping({
|
|
2621
|
+
key: hash,
|
|
2622
|
+
// TODO types
|
|
2623
|
+
value: { entry: args.entry as any, leaders: leadersForRetry },
|
|
2624
|
+
});
|
|
2625
|
+
} catch {
|
|
2626
|
+
// Best-effort only; pruning will be re-attempted on future changes.
|
|
2627
|
+
}
|
|
2628
|
+
}, delayMs);
|
|
2629
|
+
state.timer.unref?.();
|
|
2630
|
+
this._checkedPruneRetries.set(hash, state);
|
|
1507
2631
|
}
|
|
1508
2632
|
|
|
1509
2633
|
async append(
|
|
@@ -1571,286 +2695,30 @@ export class SharedLog<
|
|
|
1571
2695
|
if (options?.target !== "none") {
|
|
1572
2696
|
const target = options?.target;
|
|
1573
2697
|
const deliveryArg = options?.delivery;
|
|
1574
|
-
const
|
|
1575
|
-
deliveryArg === undefined || deliveryArg === false
|
|
1576
|
-
? undefined
|
|
1577
|
-
: deliveryArg === true
|
|
1578
|
-
? {}
|
|
1579
|
-
: deliveryArg;
|
|
1580
|
-
|
|
1581
|
-
let requireRecipients = false;
|
|
1582
|
-
let settleMin: number | undefined;
|
|
1583
|
-
let guardDelivery:
|
|
1584
|
-
| ((promise: Promise<void>) => Promise<void>)
|
|
1585
|
-
| undefined = undefined;
|
|
1586
|
-
|
|
1587
|
-
let firstDeliveryPromise: Promise<void> | undefined;
|
|
1588
|
-
let deliveryPromises: Promise<void>[] | undefined;
|
|
1589
|
-
let addDeliveryPromise: ((promise: Promise<void>) => void) | undefined;
|
|
1590
|
-
|
|
1591
|
-
const leadersForDelivery =
|
|
1592
|
-
delivery && (target === "replicators" || !target)
|
|
1593
|
-
? new Set(leaders.keys())
|
|
1594
|
-
: undefined;
|
|
1595
|
-
|
|
1596
|
-
if (delivery) {
|
|
1597
|
-
const deliverySettle = delivery.settle ?? true;
|
|
1598
|
-
const deliveryTimeout = delivery.timeout;
|
|
1599
|
-
const deliverySignal = delivery.signal;
|
|
1600
|
-
requireRecipients = delivery.requireRecipients === true;
|
|
1601
|
-
settleMin =
|
|
1602
|
-
typeof deliverySettle === "object" &&
|
|
1603
|
-
Number.isFinite(deliverySettle.min)
|
|
1604
|
-
? Math.max(0, Math.floor(deliverySettle.min))
|
|
1605
|
-
: undefined;
|
|
1606
|
-
|
|
1607
|
-
guardDelivery =
|
|
1608
|
-
deliveryTimeout == null && deliverySignal == null
|
|
1609
|
-
? undefined
|
|
1610
|
-
: (promise: Promise<void>) =>
|
|
1611
|
-
new Promise<void>((resolve, reject) => {
|
|
1612
|
-
let settled = false;
|
|
1613
|
-
let timer: ReturnType<typeof setTimeout> | undefined =
|
|
1614
|
-
undefined;
|
|
1615
|
-
const onAbort = () => {
|
|
1616
|
-
if (settled) {
|
|
1617
|
-
return;
|
|
1618
|
-
}
|
|
1619
|
-
settled = true;
|
|
1620
|
-
promise.catch(() => {});
|
|
1621
|
-
cleanup();
|
|
1622
|
-
reject(new AbortError());
|
|
1623
|
-
};
|
|
1624
|
-
|
|
1625
|
-
const cleanup = () => {
|
|
1626
|
-
if (timer != null) {
|
|
1627
|
-
clearTimeout(timer);
|
|
1628
|
-
timer = undefined;
|
|
1629
|
-
}
|
|
1630
|
-
deliverySignal?.removeEventListener("abort", onAbort);
|
|
1631
|
-
};
|
|
1632
|
-
|
|
1633
|
-
if (deliverySignal) {
|
|
1634
|
-
if (deliverySignal.aborted) {
|
|
1635
|
-
onAbort();
|
|
1636
|
-
return;
|
|
1637
|
-
}
|
|
1638
|
-
deliverySignal.addEventListener("abort", onAbort);
|
|
1639
|
-
}
|
|
1640
|
-
|
|
1641
|
-
if (deliveryTimeout != null) {
|
|
1642
|
-
timer = setTimeout(() => {
|
|
1643
|
-
if (settled) {
|
|
1644
|
-
return;
|
|
1645
|
-
}
|
|
1646
|
-
settled = true;
|
|
1647
|
-
promise.catch(() => {});
|
|
1648
|
-
cleanup();
|
|
1649
|
-
reject(new TimeoutError(`Timeout waiting for delivery`));
|
|
1650
|
-
}, deliveryTimeout);
|
|
1651
|
-
}
|
|
1652
|
-
|
|
1653
|
-
promise
|
|
1654
|
-
.then(() => {
|
|
1655
|
-
if (settled) {
|
|
1656
|
-
return;
|
|
1657
|
-
}
|
|
1658
|
-
settled = true;
|
|
1659
|
-
cleanup();
|
|
1660
|
-
resolve();
|
|
1661
|
-
})
|
|
1662
|
-
.catch((e) => {
|
|
1663
|
-
if (settled) {
|
|
1664
|
-
return;
|
|
1665
|
-
}
|
|
1666
|
-
settled = true;
|
|
1667
|
-
cleanup();
|
|
1668
|
-
reject(e);
|
|
1669
|
-
});
|
|
1670
|
-
});
|
|
2698
|
+
const hasDelivery = !(deliveryArg === undefined || deliveryArg === false);
|
|
1671
2699
|
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
}
|
|
1677
|
-
if (!deliveryPromises) {
|
|
1678
|
-
deliveryPromises = [firstDeliveryPromise, promise];
|
|
1679
|
-
firstDeliveryPromise = undefined;
|
|
1680
|
-
return;
|
|
1681
|
-
}
|
|
1682
|
-
deliveryPromises.push(promise);
|
|
1683
|
-
};
|
|
2700
|
+
if (target === "all" && hasDelivery) {
|
|
2701
|
+
throw new Error(
|
|
2702
|
+
`delivery options are not supported with target="all"; fanout broadcast is fire-and-forward`,
|
|
2703
|
+
);
|
|
1684
2704
|
}
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
if (target === "replicators" || !target) {
|
|
1690
|
-
if (message.heads[0].gidRefrences.length > 0) {
|
|
1691
|
-
for (const ref of message.heads[0].gidRefrences) {
|
|
1692
|
-
const entryFromGid = this.log.entryIndex.getHeads(ref, false);
|
|
1693
|
-
for (const entry of await entryFromGid.all()) {
|
|
1694
|
-
let coordinates = await this.getCoordinates(entry);
|
|
1695
|
-
if (coordinates == null) {
|
|
1696
|
-
coordinates = await this.createCoordinates(
|
|
1697
|
-
entry,
|
|
1698
|
-
minReplicasValue,
|
|
1699
|
-
);
|
|
1700
|
-
// TODO are we every to come here?
|
|
1701
|
-
}
|
|
1702
|
-
|
|
1703
|
-
const result = await this._findLeaders(coordinates);
|
|
1704
|
-
for (const [k, v] of result) {
|
|
1705
|
-
leaders.set(k, v);
|
|
1706
|
-
}
|
|
1707
|
-
}
|
|
1708
|
-
}
|
|
1709
|
-
}
|
|
1710
|
-
|
|
1711
|
-
const set = this.addPeersToGidPeerHistory(
|
|
1712
|
-
result.entry.meta.gid,
|
|
1713
|
-
leaders.keys(),
|
|
1714
|
-
);
|
|
1715
|
-
let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
1716
|
-
if (!hasRemotePeers) {
|
|
1717
|
-
if (requireRecipients) {
|
|
1718
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1719
|
-
}
|
|
1720
|
-
continue;
|
|
1721
|
-
}
|
|
1722
|
-
|
|
1723
|
-
if (!delivery) {
|
|
1724
|
-
this.rpc
|
|
1725
|
-
.send(message, {
|
|
1726
|
-
mode: isLeader
|
|
1727
|
-
? new SilentDelivery({ redundancy: 1, to: set })
|
|
1728
|
-
: new AcknowledgeDelivery({ redundancy: 1, to: set }),
|
|
1729
|
-
})
|
|
1730
|
-
.catch((e) => logger.error(e));
|
|
1731
|
-
continue;
|
|
1732
|
-
}
|
|
1733
|
-
|
|
1734
|
-
let expectedRemoteRecipientsCount = 0;
|
|
1735
|
-
const ackTo: string[] = [];
|
|
1736
|
-
let silentTo: string[] | undefined;
|
|
1737
|
-
const ackLimit =
|
|
1738
|
-
settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
|
|
1739
|
-
|
|
1740
|
-
// Always settle towards the current expected replicators for this entry,
|
|
1741
|
-
// not the entire gid peer history.
|
|
1742
|
-
for (const peer of leadersForDelivery!) {
|
|
1743
|
-
if (peer === selfHash) {
|
|
1744
|
-
continue;
|
|
1745
|
-
}
|
|
1746
|
-
expectedRemoteRecipientsCount++;
|
|
1747
|
-
if (ackTo.length < ackLimit) {
|
|
1748
|
-
ackTo.push(peer);
|
|
1749
|
-
} else {
|
|
1750
|
-
silentTo ||= [];
|
|
1751
|
-
silentTo.push(peer);
|
|
1752
|
-
}
|
|
1753
|
-
}
|
|
1754
|
-
|
|
1755
|
-
// Still deliver to known peers for the gid (best-effort), but don't let them
|
|
1756
|
-
// satisfy the settle requirement.
|
|
1757
|
-
for (const peer of set) {
|
|
1758
|
-
if (peer === selfHash) {
|
|
1759
|
-
continue;
|
|
1760
|
-
}
|
|
1761
|
-
if (leadersForDelivery!.has(peer)) {
|
|
1762
|
-
continue;
|
|
1763
|
-
}
|
|
1764
|
-
silentTo ||= [];
|
|
1765
|
-
silentTo.push(peer);
|
|
1766
|
-
}
|
|
1767
|
-
|
|
1768
|
-
if (requireRecipients && expectedRemoteRecipientsCount === 0) {
|
|
1769
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1770
|
-
}
|
|
1771
|
-
|
|
1772
|
-
if (
|
|
1773
|
-
requireRecipients &&
|
|
1774
|
-
ackTo.length + (silentTo?.length || 0) === 0
|
|
1775
|
-
) {
|
|
1776
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1777
|
-
}
|
|
1778
|
-
|
|
1779
|
-
if (ackTo.length > 0) {
|
|
1780
|
-
const promise = this.rpc.send(message, {
|
|
1781
|
-
mode: new AcknowledgeDelivery({
|
|
1782
|
-
redundancy: 1,
|
|
1783
|
-
to: ackTo,
|
|
1784
|
-
}),
|
|
1785
|
-
});
|
|
1786
|
-
addDeliveryPromise!(
|
|
1787
|
-
guardDelivery ? guardDelivery(promise) : promise,
|
|
1788
|
-
);
|
|
1789
|
-
}
|
|
1790
|
-
|
|
1791
|
-
if (silentTo?.length) {
|
|
1792
|
-
this.rpc
|
|
1793
|
-
.send(message, {
|
|
1794
|
-
mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
|
|
1795
|
-
})
|
|
1796
|
-
.catch((e) => logger.error(e));
|
|
1797
|
-
}
|
|
1798
|
-
} else {
|
|
1799
|
-
if (!delivery) {
|
|
1800
|
-
this.rpc.send(message).catch((e) => logger.error(e));
|
|
1801
|
-
continue;
|
|
1802
|
-
}
|
|
1803
|
-
|
|
1804
|
-
const subscribers = await this.node.services.pubsub.getSubscribers(
|
|
1805
|
-
this.rpc.topic,
|
|
1806
|
-
);
|
|
1807
|
-
|
|
1808
|
-
const ackTo: PublicSignKey[] = [];
|
|
1809
|
-
let silentTo: PublicSignKey[] | undefined;
|
|
1810
|
-
const ackLimit =
|
|
1811
|
-
settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
|
|
1812
|
-
for (const subscriber of subscribers || []) {
|
|
1813
|
-
if (subscriber.hashcode() === selfHash) {
|
|
1814
|
-
continue;
|
|
1815
|
-
}
|
|
1816
|
-
if (ackTo.length < ackLimit) {
|
|
1817
|
-
ackTo.push(subscriber);
|
|
1818
|
-
} else {
|
|
1819
|
-
silentTo ||= [];
|
|
1820
|
-
silentTo.push(subscriber);
|
|
1821
|
-
}
|
|
1822
|
-
}
|
|
1823
|
-
|
|
1824
|
-
if (
|
|
1825
|
-
requireRecipients &&
|
|
1826
|
-
ackTo.length + (silentTo?.length || 0) === 0
|
|
1827
|
-
) {
|
|
1828
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1829
|
-
}
|
|
1830
|
-
|
|
1831
|
-
if (ackTo.length > 0) {
|
|
1832
|
-
const promise = this.rpc.send(message, {
|
|
1833
|
-
mode: new AcknowledgeDelivery({ redundancy: 1, to: ackTo }),
|
|
1834
|
-
});
|
|
1835
|
-
addDeliveryPromise!(
|
|
1836
|
-
guardDelivery ? guardDelivery(promise) : promise,
|
|
1837
|
-
);
|
|
1838
|
-
}
|
|
1839
|
-
|
|
1840
|
-
if (silentTo?.length) {
|
|
1841
|
-
this.rpc
|
|
1842
|
-
.send(message, {
|
|
1843
|
-
mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
|
|
1844
|
-
})
|
|
1845
|
-
.catch((e) => logger.error(e));
|
|
1846
|
-
}
|
|
1847
|
-
}
|
|
2705
|
+
if (target === "all" && !this._fanoutChannel) {
|
|
2706
|
+
throw new Error(
|
|
2707
|
+
`No fanout channel configured for shared-log topic ${this.topic}`,
|
|
2708
|
+
);
|
|
1848
2709
|
}
|
|
1849
2710
|
|
|
1850
|
-
if (
|
|
1851
|
-
await
|
|
1852
|
-
} else
|
|
1853
|
-
await
|
|
2711
|
+
if (target === "all") {
|
|
2712
|
+
await this._appendDeliverToAllFanout(result.entry);
|
|
2713
|
+
} else {
|
|
2714
|
+
await this._appendDeliverToReplicators(
|
|
2715
|
+
result.entry,
|
|
2716
|
+
minReplicasValue,
|
|
2717
|
+
leaders,
|
|
2718
|
+
selfHash,
|
|
2719
|
+
isLeader,
|
|
2720
|
+
deliveryArg,
|
|
2721
|
+
);
|
|
1854
2722
|
}
|
|
1855
2723
|
}
|
|
1856
2724
|
|
|
@@ -1891,19 +2759,33 @@ export class SharedLog<
|
|
|
1891
2759
|
this.domain.resolution,
|
|
1892
2760
|
);
|
|
1893
2761
|
this._respondToIHaveTimeout = options?.respondToIHaveTimeout ?? 2e4;
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
2762
|
+
this._pendingDeletes = new Map();
|
|
2763
|
+
this._pendingIHave = new Map();
|
|
2764
|
+
this.latestReplicationInfoMessage = new Map();
|
|
2765
|
+
this._replicationInfoBlockedPeers = new Set();
|
|
2766
|
+
this._replicationInfoRequestByPeer = new Map();
|
|
2767
|
+
this._replicationInfoApplyQueueByPeer = new Map();
|
|
2768
|
+
this._repairRetryTimers = new Set();
|
|
2769
|
+
this._recentRepairDispatch = new Map();
|
|
2770
|
+
this._repairSweepRunning = false;
|
|
2771
|
+
this._repairSweepForceFreshPending = false;
|
|
2772
|
+
this._repairSweepAddedPeersPending = new Set();
|
|
2773
|
+
this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
|
|
2774
|
+
this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
|
|
2775
|
+
|
|
2776
|
+
this.uniqueReplicators = new Set();
|
|
2777
|
+
this._replicatorJoinEmitted = new Set();
|
|
2778
|
+
this._replicatorsReconciled = false;
|
|
1902
2779
|
|
|
1903
2780
|
this.openTime = +new Date();
|
|
1904
2781
|
this.oldestOpenTime = this.openTime;
|
|
1905
2782
|
this.distributionDebounceTime =
|
|
1906
2783
|
options?.distributionDebounceTime || DEFAULT_DISTRIBUTION_DEBOUNCE_TIME; // expect > 0
|
|
2784
|
+
this.repairSweepTargetBufferSize = toPositiveInteger(
|
|
2785
|
+
options?.sync?.repairSweepTargetBufferSize,
|
|
2786
|
+
REPAIR_SWEEP_TARGET_BUFFER_SIZE,
|
|
2787
|
+
"sync.repairSweepTargetBufferSize",
|
|
2788
|
+
);
|
|
1907
2789
|
|
|
1908
2790
|
this.timeUntilRoleMaturity =
|
|
1909
2791
|
options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
|
|
@@ -1935,6 +2817,13 @@ export class SharedLog<
|
|
|
1935
2817
|
}
|
|
1936
2818
|
|
|
1937
2819
|
this._closeController = new AbortController();
|
|
2820
|
+
this._closeController.signal.addEventListener("abort", () => {
|
|
2821
|
+
for (const [_peer, state] of this._replicationInfoRequestByPeer) {
|
|
2822
|
+
if (state.timer) clearTimeout(state.timer);
|
|
2823
|
+
}
|
|
2824
|
+
this._replicationInfoRequestByPeer.clear();
|
|
2825
|
+
});
|
|
2826
|
+
|
|
1938
2827
|
this._isTrustedReplicator = options?.canReplicate;
|
|
1939
2828
|
this.keep = options?.keep;
|
|
1940
2829
|
this.pendingMaturity = new Map();
|
|
@@ -1942,19 +2831,56 @@ export class SharedLog<
|
|
|
1942
2831
|
const id = sha256Base64Sync(this.log.id);
|
|
1943
2832
|
const storage = await this.node.storage.sublevel(id);
|
|
1944
2833
|
|
|
1945
|
-
const localBlocks = await new AnyBlockStore(
|
|
1946
|
-
|
|
1947
|
-
)
|
|
2834
|
+
const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
|
|
2835
|
+
const fanoutService = (this.node.services as any).fanout as FanoutTree | undefined;
|
|
2836
|
+
const blockProviderNamespace = (cid: string) => `cid:${cid}`;
|
|
1948
2837
|
this.remoteBlocks = new RemoteBlocks({
|
|
1949
2838
|
local: localBlocks,
|
|
1950
|
-
publish: (message, options) =>
|
|
1951
|
-
this.rpc.send(
|
|
1952
|
-
new BlocksMessage(message),
|
|
1953
|
-
(options as WithMode).mode instanceof AnyWhere ? undefined : options,
|
|
1954
|
-
),
|
|
2839
|
+
publish: (message, options) => this.rpc.send(new BlocksMessage(message), options),
|
|
1955
2840
|
waitFor: this.rpc.waitFor.bind(this.rpc),
|
|
1956
2841
|
publicKey: this.node.identity.publicKey,
|
|
1957
2842
|
eagerBlocks: options?.eagerBlocks ?? true,
|
|
2843
|
+
resolveProviders: async (cid, opts) => {
|
|
2844
|
+
// 1) tracker-backed provider directory (best-effort, bounded)
|
|
2845
|
+
try {
|
|
2846
|
+
const providers = await fanoutService?.queryProviders(
|
|
2847
|
+
blockProviderNamespace(cid),
|
|
2848
|
+
{
|
|
2849
|
+
want: 8,
|
|
2850
|
+
timeoutMs: 2_000,
|
|
2851
|
+
queryTimeoutMs: 500,
|
|
2852
|
+
bootstrapMaxPeers: 2,
|
|
2853
|
+
signal: opts?.signal,
|
|
2854
|
+
},
|
|
2855
|
+
);
|
|
2856
|
+
if (providers && providers.length > 0) return providers;
|
|
2857
|
+
} catch {
|
|
2858
|
+
// ignore discovery failures
|
|
2859
|
+
}
|
|
2860
|
+
|
|
2861
|
+
// 2) fallback to currently connected RPC peers
|
|
2862
|
+
const self = this.node.identity.publicKey.hashcode();
|
|
2863
|
+
const out: string[] = [];
|
|
2864
|
+
const peers = (this.rpc as any)?.peers;
|
|
2865
|
+
for (const h of peers?.keys?.() ?? []) {
|
|
2866
|
+
if (h === self) continue;
|
|
2867
|
+
if (out.includes(h)) continue;
|
|
2868
|
+
out.push(h);
|
|
2869
|
+
if (out.length >= 32) break;
|
|
2870
|
+
}
|
|
2871
|
+
return out;
|
|
2872
|
+
},
|
|
2873
|
+
onPut: async (cid) => {
|
|
2874
|
+
// Best-effort directory announce for "get without remote.from" workflows.
|
|
2875
|
+
try {
|
|
2876
|
+
await fanoutService?.announceProvider(blockProviderNamespace(cid), {
|
|
2877
|
+
ttlMs: 120_000,
|
|
2878
|
+
bootstrapMaxPeers: 2,
|
|
2879
|
+
});
|
|
2880
|
+
} catch {
|
|
2881
|
+
// ignore announce failures
|
|
2882
|
+
}
|
|
2883
|
+
},
|
|
1958
2884
|
});
|
|
1959
2885
|
|
|
1960
2886
|
await this.remoteBlocks.start();
|
|
@@ -1981,9 +2907,10 @@ export class SharedLog<
|
|
|
1981
2907
|
],
|
|
1982
2908
|
})) > 0;
|
|
1983
2909
|
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
|
|
2910
|
+
this._gidPeersHistory = new Map();
|
|
2911
|
+
this._requestIPruneSent = new Map();
|
|
2912
|
+
this._requestIPruneResponseReplicatorSet = new Map();
|
|
2913
|
+
this._checkedPruneRetries = new Map();
|
|
1987
2914
|
|
|
1988
2915
|
this.replicationChangeDebounceFn = debounceAggregationChanges<
|
|
1989
2916
|
ReplicationRangeIndexable<R>
|
|
@@ -2068,6 +2995,87 @@ export class SharedLog<
|
|
|
2068
2995
|
|
|
2069
2996
|
await this.log.open(this.remoteBlocks, this.node.identity, {
|
|
2070
2997
|
keychain: this.node.services.keychain,
|
|
2998
|
+
resolveRemotePeers: async (hash, options) => {
|
|
2999
|
+
if (options?.signal?.aborted) return undefined;
|
|
3000
|
+
|
|
3001
|
+
const maxPeers = 8;
|
|
3002
|
+
const self = this.node.identity.publicKey.hashcode();
|
|
3003
|
+
const seed = hashToSeed32(hash);
|
|
3004
|
+
|
|
3005
|
+
// Best hint: peers that have recently confirmed having this entry hash.
|
|
3006
|
+
const hinted = this._requestIPruneResponseReplicatorSet.get(hash);
|
|
3007
|
+
if (hinted && hinted.size > 0) {
|
|
3008
|
+
const peers = [...hinted].filter((p) => p !== self);
|
|
3009
|
+
return peers.length > 0
|
|
3010
|
+
? pickDeterministicSubset(peers, seed, maxPeers)
|
|
3011
|
+
: undefined;
|
|
3012
|
+
}
|
|
3013
|
+
|
|
3014
|
+
// Next: peers we already contacted about this hash (may still have it).
|
|
3015
|
+
const contacted = this._requestIPruneSent.get(hash);
|
|
3016
|
+
if (contacted && contacted.size > 0) {
|
|
3017
|
+
const peers = [...contacted].filter((p) => p !== self);
|
|
3018
|
+
return peers.length > 0
|
|
3019
|
+
? pickDeterministicSubset(peers, seed, maxPeers)
|
|
3020
|
+
: undefined;
|
|
3021
|
+
}
|
|
3022
|
+
|
|
3023
|
+
let candidates: string[] | undefined;
|
|
3024
|
+
|
|
3025
|
+
// Prefer the replicator cache; fall back to subscribers if we have no other signal.
|
|
3026
|
+
const replicatorCandidates = [...this.uniqueReplicators].filter(
|
|
3027
|
+
(p) => p !== self,
|
|
3028
|
+
);
|
|
3029
|
+
if (replicatorCandidates.length > 0) {
|
|
3030
|
+
candidates = replicatorCandidates;
|
|
3031
|
+
} else {
|
|
3032
|
+
try {
|
|
3033
|
+
const subscribers = await this._getTopicSubscribers(this.topic);
|
|
3034
|
+
const subscriberCandidates =
|
|
3035
|
+
subscribers?.map((k) => k.hashcode()).filter((p) => p !== self) ??
|
|
3036
|
+
[];
|
|
3037
|
+
candidates =
|
|
3038
|
+
subscriberCandidates.length > 0 ? subscriberCandidates : undefined;
|
|
3039
|
+
} catch {
|
|
3040
|
+
// Best-effort only.
|
|
3041
|
+
}
|
|
3042
|
+
|
|
3043
|
+
if (!candidates || candidates.length === 0) {
|
|
3044
|
+
// Last resort: peers we are already directly connected to. This avoids
|
|
3045
|
+
// depending on global membership knowledge in early-join scenarios.
|
|
3046
|
+
const peerMap = (this.node.services.pubsub as any)?.peers;
|
|
3047
|
+
if (peerMap?.keys) {
|
|
3048
|
+
candidates = [...peerMap.keys()];
|
|
3049
|
+
}
|
|
3050
|
+
}
|
|
3051
|
+
|
|
3052
|
+
if (!candidates || candidates.length === 0) {
|
|
3053
|
+
// Even if the pubsub stream has no established peer streams yet, we may
|
|
3054
|
+
// still have a libp2p connection to one or more peers (e.g. bootstrap).
|
|
3055
|
+
const connectionManager = (this.node.services.pubsub as any)?.components
|
|
3056
|
+
?.connectionManager;
|
|
3057
|
+
const connections = connectionManager?.getConnections?.() ?? [];
|
|
3058
|
+
const connectionHashes: string[] = [];
|
|
3059
|
+
for (const conn of connections) {
|
|
3060
|
+
const peerId = conn?.remotePeer;
|
|
3061
|
+
if (!peerId) continue;
|
|
3062
|
+
try {
|
|
3063
|
+
connectionHashes.push(getPublicKeyFromPeerId(peerId).hashcode());
|
|
3064
|
+
} catch {
|
|
3065
|
+
// Best-effort only.
|
|
3066
|
+
}
|
|
3067
|
+
}
|
|
3068
|
+
if (connectionHashes.length > 0) {
|
|
3069
|
+
candidates = connectionHashes;
|
|
3070
|
+
}
|
|
3071
|
+
}
|
|
3072
|
+
}
|
|
3073
|
+
|
|
3074
|
+
if (!candidates || candidates.length === 0) return undefined;
|
|
3075
|
+
const peers = candidates.filter((p) => p !== self);
|
|
3076
|
+
if (peers.length === 0) return undefined;
|
|
3077
|
+
return pickDeterministicSubset(peers, seed, maxPeers);
|
|
3078
|
+
},
|
|
2071
3079
|
...this._logProperties,
|
|
2072
3080
|
onChange: async (change) => {
|
|
2073
3081
|
await this.onChange(change);
|
|
@@ -2148,6 +3156,7 @@ export class SharedLog<
|
|
|
2148
3156
|
);
|
|
2149
3157
|
|
|
2150
3158
|
await this.rpc.subscribe();
|
|
3159
|
+
await this._openFanoutChannel(options?.fanout);
|
|
2151
3160
|
|
|
2152
3161
|
// mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
|
|
2153
3162
|
await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
|
|
@@ -2234,17 +3243,15 @@ export class SharedLog<
|
|
|
2234
3243
|
await this.rebalanceParticipation();
|
|
2235
3244
|
|
|
2236
3245
|
// Take into account existing subscription
|
|
2237
|
-
(await this.
|
|
2238
|
-
(v
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
},
|
|
2247
|
-
);
|
|
3246
|
+
(await this._getTopicSubscribers(this.topic))?.forEach((v) => {
|
|
3247
|
+
if (v.equals(this.node.identity.publicKey)) {
|
|
3248
|
+
return;
|
|
3249
|
+
}
|
|
3250
|
+
if (this.closed) {
|
|
3251
|
+
return;
|
|
3252
|
+
}
|
|
3253
|
+
this.handleSubscriptionChange(v, [this.topic], true);
|
|
3254
|
+
});
|
|
2248
3255
|
}
|
|
2249
3256
|
|
|
2250
3257
|
async reset() {
|
|
@@ -2278,7 +3285,7 @@ export class SharedLog<
|
|
|
2278
3285
|
})
|
|
2279
3286
|
.then(async () => {
|
|
2280
3287
|
// is reachable, announce change events
|
|
2281
|
-
const key = await this.
|
|
3288
|
+
const key = await this._resolvePublicKeyFromHash(
|
|
2282
3289
|
segment.value.hash,
|
|
2283
3290
|
);
|
|
2284
3291
|
if (!key) {
|
|
@@ -2288,22 +3295,26 @@ export class SharedLog<
|
|
|
2288
3295
|
);
|
|
2289
3296
|
}
|
|
2290
3297
|
|
|
2291
|
-
|
|
3298
|
+
const keyHash = key.hashcode();
|
|
3299
|
+
this.uniqueReplicators.add(keyHash);
|
|
2292
3300
|
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
3301
|
+
if (!this._replicatorJoinEmitted.has(keyHash)) {
|
|
3302
|
+
this._replicatorJoinEmitted.add(keyHash);
|
|
3303
|
+
this.events.dispatchEvent(
|
|
3304
|
+
new CustomEvent<ReplicatorJoinEvent>("replicator:join", {
|
|
3305
|
+
detail: { publicKey: key },
|
|
3306
|
+
}),
|
|
3307
|
+
);
|
|
3308
|
+
this.events.dispatchEvent(
|
|
3309
|
+
new CustomEvent<ReplicationChangeEvent>(
|
|
3310
|
+
"replication:change",
|
|
3311
|
+
{
|
|
3312
|
+
detail: { publicKey: key },
|
|
3313
|
+
},
|
|
3314
|
+
),
|
|
3315
|
+
);
|
|
3316
|
+
}
|
|
3317
|
+
})
|
|
2307
3318
|
.catch(async (e) => {
|
|
2308
3319
|
if (isNotStartedError(e)) {
|
|
2309
3320
|
return; // TODO test this path
|
|
@@ -2435,48 +3446,59 @@ export class SharedLog<
|
|
|
2435
3446
|
numbers: this.indexableDomain.numbers,
|
|
2436
3447
|
});
|
|
2437
3448
|
|
|
2438
|
-
|
|
2439
|
-
|
|
2440
|
-
|
|
2441
|
-
|
|
3449
|
+
// Check abort signal before building result
|
|
3450
|
+
if (options?.signal?.aborted) {
|
|
3451
|
+
return [];
|
|
3452
|
+
}
|
|
2442
3453
|
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
3454
|
+
// add all in flight
|
|
3455
|
+
for (const [key, _] of this.syncronizer.syncInFlight) {
|
|
3456
|
+
set.add(key);
|
|
3457
|
+
}
|
|
2447
3458
|
|
|
2448
|
-
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
3459
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3460
|
+
|
|
3461
|
+
if (options?.reachableOnly) {
|
|
3462
|
+
const directPeers: Map<string, unknown> | undefined = (this.node.services
|
|
3463
|
+
.pubsub as any)?.peers;
|
|
3464
|
+
|
|
3465
|
+
// Prefer the live pubsub subscriber set when filtering reachability. In some
|
|
3466
|
+
// flows peers can be reachable/active even before (or without) subscriber
|
|
3467
|
+
// state converging, so also consider direct pubsub peers.
|
|
3468
|
+
const subscribers =
|
|
3469
|
+
(await this._getTopicSubscribers(this.topic)) ?? undefined;
|
|
3470
|
+
const subscriberHashcodes = subscribers
|
|
3471
|
+
? new Set(subscribers.map((key) => key.hashcode()))
|
|
2458
3472
|
: undefined;
|
|
2459
3473
|
|
|
3474
|
+
// If reachability is requested but we have no basis for filtering yet
|
|
3475
|
+
// (subscriber snapshot hasn't converged), return the full cover set.
|
|
3476
|
+
// Otherwise, only keep peers we can currently reach.
|
|
3477
|
+
const canFilter =
|
|
3478
|
+
directPeers != null ||
|
|
3479
|
+
(subscriberHashcodes && subscriberHashcodes.size > 0);
|
|
3480
|
+
if (!canFilter) {
|
|
3481
|
+
return [...set];
|
|
3482
|
+
}
|
|
3483
|
+
|
|
2460
3484
|
const reachable: string[] = [];
|
|
2461
|
-
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2462
3485
|
for (const peer of set) {
|
|
2463
3486
|
if (peer === selfHash) {
|
|
2464
3487
|
reachable.push(peer);
|
|
2465
3488
|
continue;
|
|
2466
3489
|
}
|
|
2467
3490
|
if (
|
|
2468
|
-
subscriberHashcodes
|
|
2469
|
-
|
|
2470
|
-
: this.uniqueReplicators.has(peer)
|
|
3491
|
+
(subscriberHashcodes && subscriberHashcodes.has(peer)) ||
|
|
3492
|
+
(directPeers && directPeers.has(peer))
|
|
2471
3493
|
) {
|
|
2472
3494
|
reachable.push(peer);
|
|
2473
3495
|
}
|
|
2474
3496
|
}
|
|
2475
3497
|
return reachable;
|
|
2476
|
-
|
|
3498
|
+
}
|
|
2477
3499
|
|
|
2478
|
-
|
|
2479
|
-
|
|
3500
|
+
return [...set];
|
|
3501
|
+
} catch (error) {
|
|
2480
3502
|
// Handle race conditions where the index gets closed during the operation
|
|
2481
3503
|
if (isNotStartedError(error as Error)) {
|
|
2482
3504
|
return [];
|
|
@@ -2497,6 +3519,13 @@ export class SharedLog<
|
|
|
2497
3519
|
this.pendingMaturity.clear();
|
|
2498
3520
|
|
|
2499
3521
|
this.distributeQueue?.clear();
|
|
3522
|
+
this._closeFanoutChannel();
|
|
3523
|
+
try {
|
|
3524
|
+
this._providerHandle?.close();
|
|
3525
|
+
} catch {
|
|
3526
|
+
// ignore
|
|
3527
|
+
}
|
|
3528
|
+
this._providerHandle = undefined;
|
|
2500
3529
|
this.coordinateToHash.clear();
|
|
2501
3530
|
this.recentlyRebalanced.clear();
|
|
2502
3531
|
this.uniqueReplicators.clear();
|
|
@@ -2513,33 +3542,100 @@ export class SharedLog<
|
|
|
2513
3542
|
"unsubscribe",
|
|
2514
3543
|
this._onUnsubscriptionFn,
|
|
2515
3544
|
);
|
|
3545
|
+
for (const timer of this._repairRetryTimers) {
|
|
3546
|
+
clearTimeout(timer);
|
|
3547
|
+
}
|
|
3548
|
+
this._repairRetryTimers.clear();
|
|
3549
|
+
this._recentRepairDispatch.clear();
|
|
3550
|
+
this._repairSweepRunning = false;
|
|
3551
|
+
this._repairSweepForceFreshPending = false;
|
|
3552
|
+
this._repairSweepAddedPeersPending.clear();
|
|
2516
3553
|
|
|
2517
3554
|
for (const [_k, v] of this._pendingDeletes) {
|
|
2518
3555
|
v.clear();
|
|
2519
3556
|
v.promise.resolve(); // TODO or reject?
|
|
2520
3557
|
}
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
3558
|
+
for (const [_k, v] of this._pendingIHave) {
|
|
3559
|
+
v.clear();
|
|
3560
|
+
}
|
|
3561
|
+
for (const [_k, v] of this._checkedPruneRetries) {
|
|
3562
|
+
if (v.timer) clearTimeout(v.timer);
|
|
3563
|
+
}
|
|
2524
3564
|
|
|
2525
3565
|
await this.remoteBlocks.stop();
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
3566
|
+
this._pendingDeletes.clear();
|
|
3567
|
+
this._pendingIHave.clear();
|
|
3568
|
+
this._checkedPruneRetries.clear();
|
|
3569
|
+
this.latestReplicationInfoMessage.clear();
|
|
3570
|
+
this._gidPeersHistory.clear();
|
|
3571
|
+
this._requestIPruneSent.clear();
|
|
3572
|
+
this._requestIPruneResponseReplicatorSet.clear();
|
|
3573
|
+
// Cancel any pending debounced timers so they can't fire after we've torn down
|
|
3574
|
+
// indexes/RPC state.
|
|
3575
|
+
this.rebalanceParticipationDebounced?.close();
|
|
3576
|
+
this.replicationChangeDebounceFn?.close?.();
|
|
3577
|
+
this.pruneDebouncedFn?.close?.();
|
|
3578
|
+
this.responseToPruneDebouncedFn?.close?.();
|
|
3579
|
+
this.pruneDebouncedFn = undefined as any;
|
|
3580
|
+
this.rebalanceParticipationDebounced = undefined;
|
|
3581
|
+
this._replicationRangeIndex.stop();
|
|
3582
|
+
this._entryCoordinatesIndex.stop();
|
|
2536
3583
|
this._replicationRangeIndex = undefined as any;
|
|
2537
3584
|
this._entryCoordinatesIndex = undefined as any;
|
|
2538
3585
|
|
|
2539
3586
|
this.cpuUsage?.stop?.();
|
|
2540
3587
|
/* this._totalParticipation = 0; */
|
|
2541
3588
|
}
|
|
2542
|
-
|
|
3589
|
+
async close(from?: Program): Promise<boolean> {
|
|
3590
|
+
// Best-effort: announce that we are going offline before tearing down
|
|
3591
|
+
// RPC/subscription state.
|
|
3592
|
+
//
|
|
3593
|
+
// Important: do not delete our local replication ranges here. Keeping them
|
|
3594
|
+
// allows `replicate: { type: "resume" }` to restore the previous role on
|
|
3595
|
+
// restart. Explicit `unreplicate()` still clears local state.
|
|
3596
|
+
try {
|
|
3597
|
+
if (!this.closed) {
|
|
3598
|
+
// Prevent any late debounced timers (rebalance/prune) from publishing
|
|
3599
|
+
// replication info after we announce "segments: []". These races can leave
|
|
3600
|
+
// stale segments on remotes after rapid open/close cycles.
|
|
3601
|
+
this._isReplicating = false;
|
|
3602
|
+
this._isAdaptiveReplicating = false;
|
|
3603
|
+
this.rebalanceParticipationDebounced?.close();
|
|
3604
|
+
this.replicationChangeDebounceFn?.close?.();
|
|
3605
|
+
this.pruneDebouncedFn?.close?.();
|
|
3606
|
+
this.responseToPruneDebouncedFn?.close?.();
|
|
3607
|
+
|
|
3608
|
+
// Ensure the "I'm leaving" replication reset is actually published before
|
|
3609
|
+
// the RPC child program closes and unsubscribes from its topic. If we fire
|
|
3610
|
+
// and forget here, the publish can race with `super.close()` and get dropped,
|
|
3611
|
+
// leaving stale replication segments on remotes (flaky join/leave tests).
|
|
3612
|
+
// Also ensure close is bounded even when shard overlays are mid-reconcile.
|
|
3613
|
+
const abort = new AbortController();
|
|
3614
|
+
const abortTimer = setTimeout(() => {
|
|
3615
|
+
try {
|
|
3616
|
+
abort.abort(
|
|
3617
|
+
new TimeoutError(
|
|
3618
|
+
"shared-log close replication reset timed out",
|
|
3619
|
+
),
|
|
3620
|
+
);
|
|
3621
|
+
} catch {
|
|
3622
|
+
abort.abort();
|
|
3623
|
+
}
|
|
3624
|
+
}, 2_000);
|
|
3625
|
+
try {
|
|
3626
|
+
await this.rpc
|
|
3627
|
+
.send(new AllReplicatingSegmentsMessage({ segments: [] }), {
|
|
3628
|
+
priority: 1,
|
|
3629
|
+
signal: abort.signal,
|
|
3630
|
+
})
|
|
3631
|
+
.catch(() => {});
|
|
3632
|
+
} finally {
|
|
3633
|
+
clearTimeout(abortTimer);
|
|
3634
|
+
}
|
|
3635
|
+
}
|
|
3636
|
+
} catch {
|
|
3637
|
+
// ignore: close should be resilient even if we were never fully started
|
|
3638
|
+
}
|
|
2543
3639
|
const superClosed = await super.close(from);
|
|
2544
3640
|
if (!superClosed) {
|
|
2545
3641
|
return superClosed;
|
|
@@ -2549,12 +3645,50 @@ export class SharedLog<
|
|
|
2549
3645
|
return true;
|
|
2550
3646
|
}
|
|
2551
3647
|
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2556
|
-
|
|
2557
|
-
|
|
3648
|
+
async drop(from?: Program): Promise<boolean> {
|
|
3649
|
+
// Best-effort: announce that we are going offline before tearing down
|
|
3650
|
+
// RPC/subscription state (same reasoning as in `close()`).
|
|
3651
|
+
try {
|
|
3652
|
+
if (!this.closed) {
|
|
3653
|
+
this._isReplicating = false;
|
|
3654
|
+
this._isAdaptiveReplicating = false;
|
|
3655
|
+
this.rebalanceParticipationDebounced?.close();
|
|
3656
|
+
this.replicationChangeDebounceFn?.close?.();
|
|
3657
|
+
this.pruneDebouncedFn?.close?.();
|
|
3658
|
+
this.responseToPruneDebouncedFn?.close?.();
|
|
3659
|
+
|
|
3660
|
+
const abort = new AbortController();
|
|
3661
|
+
const abortTimer = setTimeout(() => {
|
|
3662
|
+
try {
|
|
3663
|
+
abort.abort(
|
|
3664
|
+
new TimeoutError(
|
|
3665
|
+
"shared-log drop replication reset timed out",
|
|
3666
|
+
),
|
|
3667
|
+
);
|
|
3668
|
+
} catch {
|
|
3669
|
+
abort.abort();
|
|
3670
|
+
}
|
|
3671
|
+
}, 2_000);
|
|
3672
|
+
try {
|
|
3673
|
+
await this.rpc
|
|
3674
|
+
.send(new AllReplicatingSegmentsMessage({ segments: [] }), {
|
|
3675
|
+
priority: 1,
|
|
3676
|
+
signal: abort.signal,
|
|
3677
|
+
})
|
|
3678
|
+
.catch(() => {});
|
|
3679
|
+
} finally {
|
|
3680
|
+
clearTimeout(abortTimer);
|
|
3681
|
+
}
|
|
3682
|
+
}
|
|
3683
|
+
} catch {
|
|
3684
|
+
// ignore: drop should be resilient even if we were never fully started
|
|
3685
|
+
}
|
|
3686
|
+
|
|
3687
|
+
const superDropped = await super.drop(from);
|
|
3688
|
+
if (!superDropped) {
|
|
3689
|
+
return superDropped;
|
|
3690
|
+
}
|
|
3691
|
+
await this._entryCoordinatesIndex.drop();
|
|
2558
3692
|
await this._replicationRangeIndex.drop();
|
|
2559
3693
|
await this.log.drop();
|
|
2560
3694
|
await this._close();
|
|
@@ -2609,7 +3743,6 @@ export class SharedLog<
|
|
|
2609
3743
|
if (filteredHeads.length === 0) {
|
|
2610
3744
|
return;
|
|
2611
3745
|
}
|
|
2612
|
-
|
|
2613
3746
|
const groupedByGid = await groupByGid(filteredHeads);
|
|
2614
3747
|
const promises: Promise<void>[] = [];
|
|
2615
3748
|
|
|
@@ -2921,20 +4054,20 @@ export class SharedLog<
|
|
|
2921
4054
|
return;
|
|
2922
4055
|
}
|
|
2923
4056
|
|
|
2924
|
-
|
|
2925
|
-
|
|
2926
|
-
|
|
4057
|
+
const segments = (await this.getMyReplicationSegments()).map((x) =>
|
|
4058
|
+
x.toReplicationRange(),
|
|
4059
|
+
);
|
|
2927
4060
|
|
|
2928
|
-
|
|
2929
|
-
|
|
2930
|
-
|
|
2931
|
-
|
|
2932
|
-
|
|
4061
|
+
this.rpc
|
|
4062
|
+
.send(new AllReplicatingSegmentsMessage({ segments }), {
|
|
4063
|
+
mode: new AcknowledgeDelivery({ to: [context.from], redundancy: 1 }),
|
|
4064
|
+
})
|
|
4065
|
+
.catch((e) => logger.error(e.toString()));
|
|
2933
4066
|
|
|
2934
|
-
|
|
2935
|
-
|
|
2936
|
-
|
|
2937
|
-
|
|
4067
|
+
// for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
|
|
4068
|
+
if (this.v8Behaviour) {
|
|
4069
|
+
const role = this.getRole();
|
|
4070
|
+
if (role instanceof Replicator) {
|
|
2938
4071
|
const fixedSettings = !this._isAdaptiveReplicating;
|
|
2939
4072
|
if (fixedSettings) {
|
|
2940
4073
|
await this.rpc.send(
|
|
@@ -2959,71 +4092,91 @@ export class SharedLog<
|
|
|
2959
4092
|
return;
|
|
2960
4093
|
}
|
|
2961
4094
|
|
|
2962
|
-
|
|
2963
|
-
|
|
2964
|
-
|
|
2965
|
-
|
|
2966
|
-
|
|
2967
|
-
|
|
2968
|
-
|
|
2969
|
-
|
|
2970
|
-
|
|
2971
|
-
|
|
2972
|
-
const prev = this.latestReplicationInfoMessage.get(from.hashcode());
|
|
2973
|
-
if (prev && prev > messageTimestamp) {
|
|
4095
|
+
const replicationInfoMessage = msg as
|
|
4096
|
+
| AllReplicatingSegmentsMessage
|
|
4097
|
+
| AddedReplicationSegmentMessage;
|
|
4098
|
+
|
|
4099
|
+
// Process replication updates even if the sender isn't yet considered "ready" by
|
|
4100
|
+
// `Program.waitFor()`. Dropping these messages can lead to missing replicator info
|
|
4101
|
+
// (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
|
|
4102
|
+
const from = context.from!;
|
|
4103
|
+
const fromHash = from.hashcode();
|
|
4104
|
+
if (this._replicationInfoBlockedPeers.has(fromHash)) {
|
|
2974
4105
|
return;
|
|
2975
4106
|
}
|
|
4107
|
+
const messageTimestamp = context.message.header.timestamp;
|
|
4108
|
+
await this.withReplicationInfoApplyQueue(fromHash, async () => {
|
|
4109
|
+
try {
|
|
4110
|
+
// The peer may have unsubscribed after this message was queued.
|
|
4111
|
+
if (this._replicationInfoBlockedPeers.has(fromHash)) {
|
|
4112
|
+
return;
|
|
4113
|
+
}
|
|
2976
4114
|
|
|
2977
|
-
|
|
2978
|
-
|
|
2979
|
-
|
|
2980
|
-
|
|
4115
|
+
// Process in-order to avoid races where repeated reset messages arrive
|
|
4116
|
+
// concurrently and trigger spurious "added" diffs / rebalancing.
|
|
4117
|
+
const prev = this.latestReplicationInfoMessage.get(fromHash);
|
|
4118
|
+
if (prev && prev > messageTimestamp) {
|
|
4119
|
+
return;
|
|
4120
|
+
}
|
|
2981
4121
|
|
|
2982
|
-
|
|
2983
|
-
return;
|
|
2984
|
-
}
|
|
4122
|
+
this.latestReplicationInfoMessage.set(fromHash, messageTimestamp);
|
|
2985
4123
|
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
x.toReplicationRangeIndexable(from),
|
|
2990
|
-
),
|
|
2991
|
-
from,
|
|
2992
|
-
{
|
|
2993
|
-
reset,
|
|
2994
|
-
checkDuplicates: true,
|
|
2995
|
-
timestamp: Number(messageTimestamp),
|
|
2996
|
-
},
|
|
2997
|
-
);
|
|
2998
|
-
})().catch((e) => {
|
|
2999
|
-
if (isNotStartedError(e)) {
|
|
3000
|
-
return;
|
|
3001
|
-
}
|
|
3002
|
-
logger.error(
|
|
3003
|
-
`Failed to apply replication settings from '${from.hashcode()}': ${
|
|
3004
|
-
e?.message ?? e
|
|
3005
|
-
}`,
|
|
3006
|
-
);
|
|
3007
|
-
});
|
|
3008
|
-
} else if (msg instanceof StoppedReplicating) {
|
|
3009
|
-
if (context.from.equals(this.node.identity.publicKey)) {
|
|
3010
|
-
return;
|
|
3011
|
-
}
|
|
4124
|
+
if (this.closed) {
|
|
4125
|
+
return;
|
|
4126
|
+
}
|
|
3012
4127
|
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
|
|
4128
|
+
const reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
4129
|
+
await this.addReplicationRange(
|
|
4130
|
+
replicationInfoMessage.segments.map((x) =>
|
|
4131
|
+
x.toReplicationRangeIndexable(from),
|
|
4132
|
+
),
|
|
4133
|
+
from,
|
|
4134
|
+
{
|
|
4135
|
+
reset,
|
|
4136
|
+
checkDuplicates: true,
|
|
4137
|
+
timestamp: Number(messageTimestamp),
|
|
4138
|
+
},
|
|
4139
|
+
);
|
|
3017
4140
|
|
|
3018
|
-
|
|
3019
|
-
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
|
|
4141
|
+
// If the peer reports any replication segments, stop re-requesting.
|
|
4142
|
+
// (Empty reports can be transient during startup.)
|
|
4143
|
+
if (replicationInfoMessage.segments.length > 0) {
|
|
4144
|
+
this.cancelReplicationInfoRequests(fromHash);
|
|
4145
|
+
}
|
|
4146
|
+
} catch (e) {
|
|
4147
|
+
if (isNotStartedError(e as Error)) {
|
|
4148
|
+
return;
|
|
4149
|
+
}
|
|
4150
|
+
logger.error(
|
|
4151
|
+
`Failed to apply replication settings from '${fromHash}': ${
|
|
4152
|
+
(e as any)?.message ?? e
|
|
4153
|
+
}`,
|
|
4154
|
+
);
|
|
4155
|
+
}
|
|
3025
4156
|
});
|
|
3026
|
-
|
|
4157
|
+
} else if (msg instanceof StoppedReplicating) {
|
|
4158
|
+
if (context.from.equals(this.node.identity.publicKey)) {
|
|
4159
|
+
return;
|
|
4160
|
+
}
|
|
4161
|
+
const fromHash = context.from.hashcode();
|
|
4162
|
+
if (this._replicationInfoBlockedPeers.has(fromHash)) {
|
|
4163
|
+
return;
|
|
4164
|
+
}
|
|
4165
|
+
|
|
4166
|
+
const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(
|
|
4167
|
+
msg.segmentIds,
|
|
4168
|
+
context.from,
|
|
4169
|
+
);
|
|
4170
|
+
|
|
4171
|
+
await this.removeReplicationRanges(rangesToRemove, context.from);
|
|
4172
|
+
const timestamp = BigInt(+new Date());
|
|
4173
|
+
for (const range of rangesToRemove) {
|
|
4174
|
+
this.replicationChangeDebounceFn.add({
|
|
4175
|
+
range,
|
|
4176
|
+
type: "removed",
|
|
4177
|
+
timestamp,
|
|
4178
|
+
});
|
|
4179
|
+
}
|
|
3027
4180
|
} else {
|
|
3028
4181
|
throw new Error("Unexpected message");
|
|
3029
4182
|
}
|
|
@@ -3325,10 +4478,10 @@ export class SharedLog<
|
|
|
3325
4478
|
}
|
|
3326
4479
|
}
|
|
3327
4480
|
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
4481
|
+
async waitForReplicator(
|
|
4482
|
+
key: PublicSignKey,
|
|
4483
|
+
options?: {
|
|
4484
|
+
signal?: AbortSignal;
|
|
3332
4485
|
eager?: boolean;
|
|
3333
4486
|
roleAge?: number;
|
|
3334
4487
|
timeout?: number;
|
|
@@ -3340,9 +4493,9 @@ export class SharedLog<
|
|
|
3340
4493
|
? undefined
|
|
3341
4494
|
: (options?.roleAge ?? (await this.getDefaultMinRoleAge()));
|
|
3342
4495
|
|
|
3343
|
-
|
|
3344
|
-
|
|
3345
|
-
|
|
4496
|
+
let settled = false;
|
|
4497
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
4498
|
+
let requestTimer: ReturnType<typeof setTimeout> | undefined;
|
|
3346
4499
|
|
|
3347
4500
|
const clear = () => {
|
|
3348
4501
|
this.events.removeEventListener("replicator:mature", check);
|
|
@@ -3358,14 +4511,19 @@ export class SharedLog<
|
|
|
3358
4511
|
}
|
|
3359
4512
|
};
|
|
3360
4513
|
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
|
|
3364
|
-
|
|
3365
|
-
|
|
3366
|
-
|
|
3367
|
-
|
|
3368
|
-
|
|
4514
|
+
const resolve = async () => {
|
|
4515
|
+
if (settled) {
|
|
4516
|
+
return;
|
|
4517
|
+
}
|
|
4518
|
+
settled = true;
|
|
4519
|
+
clear();
|
|
4520
|
+
// `waitForReplicator()` is typically used as a precondition before join/replicate
|
|
4521
|
+
// flows. A replicator can become mature and enqueue a debounced rebalance
|
|
4522
|
+
// (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
|
|
4523
|
+
// observe a "late" rebalance after the wait resolves.
|
|
4524
|
+
await this.replicationChangeDebounceFn?.flush?.();
|
|
4525
|
+
deferred.resolve();
|
|
4526
|
+
};
|
|
3369
4527
|
|
|
3370
4528
|
const reject = (error: Error) => {
|
|
3371
4529
|
if (settled) {
|
|
@@ -3409,13 +4567,14 @@ export class SharedLog<
|
|
|
3409
4567
|
|
|
3410
4568
|
this.rpc
|
|
3411
4569
|
.send(new RequestReplicationInfoMessage(), {
|
|
3412
|
-
mode: new
|
|
4570
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [key] }),
|
|
3413
4571
|
})
|
|
3414
4572
|
.catch((e) => {
|
|
3415
4573
|
// Best-effort: missing peers / unopened RPC should not fail the wait logic.
|
|
3416
4574
|
if (isNotStartedError(e as Error)) {
|
|
3417
4575
|
return;
|
|
3418
4576
|
}
|
|
4577
|
+
logger.error(e?.toString?.() ?? String(e));
|
|
3419
4578
|
});
|
|
3420
4579
|
|
|
3421
4580
|
if (requestAttempts < maxRequestAttempts) {
|
|
@@ -3423,29 +4582,29 @@ export class SharedLog<
|
|
|
3423
4582
|
}
|
|
3424
4583
|
};
|
|
3425
4584
|
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
|
|
3429
|
-
|
|
3430
|
-
|
|
3431
|
-
|
|
3432
|
-
|
|
3433
|
-
|
|
3434
|
-
|
|
3435
|
-
return;
|
|
3436
|
-
}
|
|
3437
|
-
if (!options?.eager && resolvedRoleAge != null) {
|
|
3438
|
-
if (!isMatured(rect, +new Date(), resolvedRoleAge)) {
|
|
4585
|
+
const check = async () => {
|
|
4586
|
+
const iterator = this.replicationIndex?.iterate(
|
|
4587
|
+
{ query: new StringMatch({ key: "hash", value: key.hashcode() }) },
|
|
4588
|
+
{ reference: true },
|
|
4589
|
+
);
|
|
4590
|
+
try {
|
|
4591
|
+
const rects = await iterator?.next(1);
|
|
4592
|
+
const rect = rects?.[0]?.value;
|
|
4593
|
+
if (!rect) {
|
|
3439
4594
|
return;
|
|
3440
4595
|
}
|
|
4596
|
+
if (!options?.eager && resolvedRoleAge != null) {
|
|
4597
|
+
if (!isMatured(rect, +new Date(), resolvedRoleAge)) {
|
|
4598
|
+
return;
|
|
4599
|
+
}
|
|
4600
|
+
}
|
|
4601
|
+
await resolve();
|
|
4602
|
+
} catch (error) {
|
|
4603
|
+
reject(error instanceof Error ? error : new Error(String(error)));
|
|
4604
|
+
} finally {
|
|
4605
|
+
await iterator?.close();
|
|
3441
4606
|
}
|
|
3442
|
-
|
|
3443
|
-
} catch (error) {
|
|
3444
|
-
reject(error instanceof Error ? error : new Error(String(error)));
|
|
3445
|
-
} finally {
|
|
3446
|
-
await iterator?.close();
|
|
3447
|
-
}
|
|
3448
|
-
};
|
|
4607
|
+
};
|
|
3449
4608
|
|
|
3450
4609
|
requestReplicationInfo();
|
|
3451
4610
|
check();
|
|
@@ -3462,15 +4621,6 @@ export class SharedLog<
|
|
|
3462
4621
|
coverageThreshold?: number;
|
|
3463
4622
|
waitForNewPeers?: boolean;
|
|
3464
4623
|
}) {
|
|
3465
|
-
// if no remotes, just return
|
|
3466
|
-
const subscribers = await this.node.services.pubsub.getSubscribers(
|
|
3467
|
-
this.rpc.topic,
|
|
3468
|
-
);
|
|
3469
|
-
let waitForNewPeers = options?.waitForNewPeers;
|
|
3470
|
-
if (!waitForNewPeers && (subscribers?.length ?? 0) === 0) {
|
|
3471
|
-
throw new NoPeersError(this.rpc.topic);
|
|
3472
|
-
}
|
|
3473
|
-
|
|
3474
4624
|
let coverageThreshold = options?.coverageThreshold ?? 1;
|
|
3475
4625
|
let deferred = pDefer<void>();
|
|
3476
4626
|
let settled = false;
|
|
@@ -3584,6 +4734,7 @@ export class SharedLog<
|
|
|
3584
4734
|
const timeout = options.timeout ?? this.waitForReplicatorTimeout;
|
|
3585
4735
|
|
|
3586
4736
|
return new Promise((resolve, reject) => {
|
|
4737
|
+
let settled = false;
|
|
3587
4738
|
const removeListeners = () => {
|
|
3588
4739
|
this.events.removeEventListener("replication:change", roleListener);
|
|
3589
4740
|
this.events.removeEventListener("replicator:mature", roleListener); // TODO replication:change event ?
|
|
@@ -3592,15 +4743,26 @@ export class SharedLog<
|
|
|
3592
4743
|
abortListener,
|
|
3593
4744
|
);
|
|
3594
4745
|
};
|
|
3595
|
-
const
|
|
4746
|
+
const settleResolve = (value: Map<string, { intersecting: boolean }> | false) => {
|
|
4747
|
+
if (settled) return;
|
|
4748
|
+
settled = true;
|
|
4749
|
+
removeListeners();
|
|
4750
|
+
clearTimeout(timer);
|
|
4751
|
+
resolve(value);
|
|
4752
|
+
};
|
|
4753
|
+
const settleReject = (error: unknown) => {
|
|
4754
|
+
if (settled) return;
|
|
4755
|
+
settled = true;
|
|
3596
4756
|
removeListeners();
|
|
3597
4757
|
clearTimeout(timer);
|
|
3598
|
-
|
|
4758
|
+
reject(error);
|
|
4759
|
+
};
|
|
4760
|
+
const abortListener = () => {
|
|
4761
|
+
settleResolve(false);
|
|
3599
4762
|
};
|
|
3600
4763
|
|
|
3601
4764
|
const timer = setTimeout(async () => {
|
|
3602
|
-
|
|
3603
|
-
resolve(false);
|
|
4765
|
+
settleResolve(false);
|
|
3604
4766
|
}, timeout);
|
|
3605
4767
|
|
|
3606
4768
|
const check = async () => {
|
|
@@ -3624,19 +4786,22 @@ export class SharedLog<
|
|
|
3624
4786
|
}
|
|
3625
4787
|
options?.onLeader && leaderKeys.forEach(options.onLeader);
|
|
3626
4788
|
|
|
3627
|
-
|
|
3628
|
-
|
|
3629
|
-
|
|
4789
|
+
settleResolve(leaders);
|
|
4790
|
+
};
|
|
4791
|
+
const runCheck = () => {
|
|
4792
|
+
void check().catch((error) => {
|
|
4793
|
+
settleReject(error);
|
|
4794
|
+
});
|
|
3630
4795
|
};
|
|
3631
4796
|
|
|
3632
4797
|
const roleListener = () => {
|
|
3633
|
-
|
|
4798
|
+
runCheck();
|
|
3634
4799
|
};
|
|
3635
4800
|
|
|
3636
4801
|
this.events.addEventListener("replication:change", roleListener); // TODO replication:change event ?
|
|
3637
4802
|
this.events.addEventListener("replicator:mature", roleListener); // TODO replication:change event ?
|
|
3638
4803
|
this._closeController.signal.addEventListener("abort", abortListener);
|
|
3639
|
-
|
|
4804
|
+
runCheck();
|
|
3640
4805
|
});
|
|
3641
4806
|
}
|
|
3642
4807
|
|
|
@@ -3735,9 +4900,7 @@ export class SharedLog<
|
|
|
3735
4900
|
let subscribers = 1;
|
|
3736
4901
|
if (!this.rpc.closed) {
|
|
3737
4902
|
try {
|
|
3738
|
-
subscribers =
|
|
3739
|
-
(await this.node.services.pubsub.getSubscribers(this.rpc.topic))
|
|
3740
|
-
?.length ?? 1;
|
|
4903
|
+
subscribers = (await this._getTopicSubscribers(this.rpc.topic))?.length ?? 1;
|
|
3741
4904
|
} catch {
|
|
3742
4905
|
// Best-effort only; fall back to 1.
|
|
3743
4906
|
}
|
|
@@ -3852,22 +5015,45 @@ export class SharedLog<
|
|
|
3852
5015
|
const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
|
|
3853
5016
|
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3854
5017
|
|
|
3855
|
-
//
|
|
3856
|
-
//
|
|
3857
|
-
//
|
|
5018
|
+
// Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
|
|
5019
|
+
// If it is still warming up (for example, only contains self), supplement with
|
|
5020
|
+
// current subscribers until we have enough candidates for this decision.
|
|
3858
5021
|
let peerFilter: Set<string> | undefined = undefined;
|
|
3859
|
-
|
|
3860
|
-
|
|
3861
|
-
|
|
3862
|
-
|
|
5022
|
+
const selfReplicating = await this.isReplicating();
|
|
5023
|
+
if (this.uniqueReplicators.size > 0) {
|
|
5024
|
+
peerFilter = new Set(this.uniqueReplicators);
|
|
5025
|
+
if (selfReplicating) {
|
|
5026
|
+
peerFilter.add(selfHash);
|
|
5027
|
+
} else {
|
|
5028
|
+
peerFilter.delete(selfHash);
|
|
5029
|
+
}
|
|
5030
|
+
|
|
5031
|
+
try {
|
|
5032
|
+
const subscribers = await this._getTopicSubscribers(this.topic);
|
|
5033
|
+
if (subscribers && subscribers.length > 0) {
|
|
5034
|
+
for (const subscriber of subscribers) {
|
|
5035
|
+
peerFilter.add(subscriber.hashcode());
|
|
5036
|
+
}
|
|
5037
|
+
if (selfReplicating) {
|
|
5038
|
+
peerFilter.add(selfHash);
|
|
5039
|
+
} else {
|
|
5040
|
+
peerFilter.delete(selfHash);
|
|
5041
|
+
}
|
|
5042
|
+
}
|
|
5043
|
+
} catch {
|
|
5044
|
+
// Best-effort only; keep current peerFilter.
|
|
5045
|
+
}
|
|
3863
5046
|
} else {
|
|
3864
5047
|
try {
|
|
3865
5048
|
const subscribers =
|
|
3866
|
-
(await this.
|
|
3867
|
-
undefined;
|
|
5049
|
+
(await this._getTopicSubscribers(this.topic)) ?? undefined;
|
|
3868
5050
|
if (subscribers && subscribers.length > 0) {
|
|
3869
5051
|
peerFilter = new Set(subscribers.map((key) => key.hashcode()));
|
|
3870
|
-
|
|
5052
|
+
if (selfReplicating) {
|
|
5053
|
+
peerFilter.add(selfHash);
|
|
5054
|
+
} else {
|
|
5055
|
+
peerFilter.delete(selfHash);
|
|
5056
|
+
}
|
|
3871
5057
|
}
|
|
3872
5058
|
} catch {
|
|
3873
5059
|
// Best-effort only; if pubsub isn't ready, do a full scan.
|
|
@@ -3913,76 +5099,171 @@ export class SharedLog<
|
|
|
3913
5099
|
);
|
|
3914
5100
|
}
|
|
3915
5101
|
|
|
3916
|
-
|
|
3917
|
-
|
|
3918
|
-
|
|
3919
|
-
|
|
3920
|
-
|
|
3921
|
-
|
|
5102
|
+
private withReplicationInfoApplyQueue(
|
|
5103
|
+
peerHash: string,
|
|
5104
|
+
fn: () => Promise<void>,
|
|
5105
|
+
): Promise<void> {
|
|
5106
|
+
const prev = this._replicationInfoApplyQueueByPeer.get(peerHash);
|
|
5107
|
+
const next = (prev ?? Promise.resolve())
|
|
5108
|
+
.catch(() => {
|
|
5109
|
+
// Avoid stuck queues if a previous apply failed.
|
|
5110
|
+
})
|
|
5111
|
+
.then(fn);
|
|
5112
|
+
this._replicationInfoApplyQueueByPeer.set(peerHash, next);
|
|
5113
|
+
return next.finally(() => {
|
|
5114
|
+
if (this._replicationInfoApplyQueueByPeer.get(peerHash) === next) {
|
|
5115
|
+
this._replicationInfoApplyQueueByPeer.delete(peerHash);
|
|
5116
|
+
}
|
|
5117
|
+
});
|
|
5118
|
+
}
|
|
5119
|
+
|
|
5120
|
+
private cancelReplicationInfoRequests(peerHash: string) {
|
|
5121
|
+
const state = this._replicationInfoRequestByPeer.get(peerHash);
|
|
5122
|
+
if (!state) return;
|
|
5123
|
+
if (state.timer) {
|
|
5124
|
+
clearTimeout(state.timer);
|
|
5125
|
+
}
|
|
5126
|
+
this._replicationInfoRequestByPeer.delete(peerHash);
|
|
5127
|
+
}
|
|
5128
|
+
|
|
5129
|
+
private scheduleReplicationInfoRequests(peer: PublicSignKey) {
|
|
5130
|
+
const peerHash = peer.hashcode();
|
|
5131
|
+
if (this._replicationInfoRequestByPeer.has(peerHash)) {
|
|
3922
5132
|
return;
|
|
3923
5133
|
}
|
|
3924
5134
|
|
|
3925
|
-
|
|
3926
|
-
|
|
5135
|
+
const state: { attempts: number; timer?: ReturnType<typeof setTimeout> } = {
|
|
5136
|
+
attempts: 0,
|
|
5137
|
+
};
|
|
5138
|
+
this._replicationInfoRequestByPeer.set(peerHash, state);
|
|
3927
5139
|
|
|
3928
|
-
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
|
|
3932
|
-
|
|
5140
|
+
const intervalMs = Math.max(50, this.waitForReplicatorRequestIntervalMs);
|
|
5141
|
+
const maxAttempts = Math.min(
|
|
5142
|
+
5,
|
|
5143
|
+
this.waitForReplicatorRequestMaxAttempts ??
|
|
5144
|
+
WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS,
|
|
5145
|
+
);
|
|
5146
|
+
|
|
5147
|
+
const tick = () => {
|
|
5148
|
+
if (this.closed || this._closeController.signal.aborted) {
|
|
5149
|
+
this.cancelReplicationInfoRequests(peerHash);
|
|
5150
|
+
return;
|
|
3933
5151
|
}
|
|
3934
5152
|
|
|
3935
|
-
|
|
3936
|
-
|
|
3937
|
-
|
|
3938
|
-
|
|
3939
|
-
|
|
5153
|
+
state.attempts++;
|
|
5154
|
+
|
|
5155
|
+
this.rpc
|
|
5156
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
5157
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [peer] }),
|
|
5158
|
+
})
|
|
5159
|
+
.catch((e) => {
|
|
5160
|
+
// Best-effort: missing peers / unopened RPC should not fail join flows.
|
|
5161
|
+
if (isNotStartedError(e as Error)) {
|
|
5162
|
+
return;
|
|
5163
|
+
}
|
|
5164
|
+
logger.error(e?.toString?.() ?? String(e));
|
|
5165
|
+
});
|
|
5166
|
+
|
|
5167
|
+
if (state.attempts >= maxAttempts) {
|
|
5168
|
+
this.cancelReplicationInfoRequests(peerHash);
|
|
5169
|
+
return;
|
|
3940
5170
|
}
|
|
3941
5171
|
|
|
3942
|
-
|
|
5172
|
+
state.timer = setTimeout(tick, intervalMs);
|
|
5173
|
+
state.timer.unref?.();
|
|
5174
|
+
};
|
|
3943
5175
|
|
|
3944
|
-
|
|
3945
|
-
|
|
3946
|
-
})) > 0 &&
|
|
3947
|
-
this.events.dispatchEvent(
|
|
3948
|
-
new CustomEvent<ReplicatorLeaveEvent>("replicator:leave", {
|
|
3949
|
-
detail: { publicKey },
|
|
3950
|
-
}),
|
|
3951
|
-
);
|
|
3952
|
-
}
|
|
5176
|
+
tick();
|
|
5177
|
+
}
|
|
3953
5178
|
|
|
3954
|
-
|
|
3955
|
-
|
|
3956
|
-
|
|
3957
|
-
|
|
3958
|
-
|
|
3959
|
-
|
|
3960
|
-
|
|
5179
|
+
async handleSubscriptionChange(
|
|
5180
|
+
publicKey: PublicSignKey,
|
|
5181
|
+
topics: string[],
|
|
5182
|
+
subscribed: boolean,
|
|
5183
|
+
) {
|
|
5184
|
+
if (!topics.includes(this.topic)) {
|
|
5185
|
+
return;
|
|
5186
|
+
}
|
|
5187
|
+
|
|
5188
|
+
const peerHash = publicKey.hashcode();
|
|
5189
|
+
if (subscribed) {
|
|
5190
|
+
this._replicationInfoBlockedPeers.delete(peerHash);
|
|
5191
|
+
} else {
|
|
5192
|
+
this._replicationInfoBlockedPeers.add(peerHash);
|
|
5193
|
+
}
|
|
5194
|
+
|
|
5195
|
+
if (!subscribed) {
|
|
5196
|
+
const wasReplicator = this.uniqueReplicators.has(peerHash);
|
|
5197
|
+
try {
|
|
5198
|
+
// Unsubscribe can race with the peer's final replication reset message.
|
|
5199
|
+
// Proactively evict its ranges so leader selection doesn't keep stale owners.
|
|
5200
|
+
await this.removeReplicator(publicKey, { noEvent: true });
|
|
5201
|
+
} catch (error) {
|
|
5202
|
+
if (!isNotStartedError(error as Error)) {
|
|
5203
|
+
throw error;
|
|
5204
|
+
}
|
|
5205
|
+
}
|
|
5206
|
+
|
|
5207
|
+
// Emit replicator:leave at most once per (join -> leave) transition, even if we
|
|
5208
|
+
// concurrently process unsubscribe + replication reset messages for the same peer.
|
|
5209
|
+
const stoppedTransition = wasReplicator;
|
|
5210
|
+
this._replicatorJoinEmitted.delete(peerHash);
|
|
5211
|
+
|
|
5212
|
+
this.cancelReplicationInfoRequests(peerHash);
|
|
5213
|
+
this.removePeerFromGidPeerHistory(peerHash);
|
|
5214
|
+
|
|
5215
|
+
for (const [k, v] of this._requestIPruneSent) {
|
|
5216
|
+
v.delete(peerHash);
|
|
5217
|
+
if (v.size === 0) {
|
|
5218
|
+
this._requestIPruneSent.delete(k);
|
|
5219
|
+
}
|
|
5220
|
+
}
|
|
5221
|
+
|
|
5222
|
+
for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
|
|
5223
|
+
v.delete(peerHash);
|
|
5224
|
+
if (v.size === 0) {
|
|
5225
|
+
this._requestIPruneResponseReplicatorSet.delete(k);
|
|
5226
|
+
}
|
|
5227
|
+
}
|
|
5228
|
+
|
|
5229
|
+
this.syncronizer.onPeerDisconnected(publicKey);
|
|
5230
|
+
|
|
5231
|
+
stoppedTransition &&
|
|
5232
|
+
this.events.dispatchEvent(
|
|
5233
|
+
new CustomEvent<ReplicatorLeaveEvent>("replicator:leave", {
|
|
5234
|
+
detail: { publicKey },
|
|
3961
5235
|
}),
|
|
3962
|
-
|
|
3963
|
-
|
|
3964
|
-
},
|
|
3965
|
-
)
|
|
3966
|
-
.catch((e) => logger.error(e.toString()));
|
|
5236
|
+
);
|
|
5237
|
+
}
|
|
3967
5238
|
|
|
3968
|
-
|
|
3969
|
-
|
|
5239
|
+
if (subscribed) {
|
|
5240
|
+
const replicationSegments = await this.getMyReplicationSegments();
|
|
5241
|
+
if (replicationSegments.length > 0) {
|
|
3970
5242
|
this.rpc
|
|
3971
|
-
.send(
|
|
3972
|
-
|
|
3973
|
-
|
|
5243
|
+
.send(
|
|
5244
|
+
new AllReplicatingSegmentsMessage({
|
|
5245
|
+
segments: replicationSegments.map((x) => x.toReplicationRange()),
|
|
5246
|
+
}),
|
|
5247
|
+
{
|
|
5248
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
|
|
5249
|
+
},
|
|
5250
|
+
)
|
|
3974
5251
|
.catch((e) => logger.error(e.toString()));
|
|
5252
|
+
|
|
5253
|
+
if (this.v8Behaviour) {
|
|
5254
|
+
// for backwards compatibility
|
|
5255
|
+
this.rpc
|
|
5256
|
+
.send(new ResponseRoleMessage({ role: await this.getRole() }), {
|
|
5257
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
|
|
5258
|
+
})
|
|
5259
|
+
.catch((e) => logger.error(e.toString()));
|
|
5260
|
+
}
|
|
3975
5261
|
}
|
|
3976
|
-
}
|
|
3977
5262
|
|
|
3978
|
-
|
|
3979
|
-
|
|
5263
|
+
// Request the remote peer's replication info. This makes joins resilient to
|
|
5264
|
+
// timing-sensitive delivery/order issues where we may miss their initial
|
|
3980
5265
|
// replication announcement.
|
|
3981
|
-
this.
|
|
3982
|
-
.send(new RequestReplicationInfoMessage(), {
|
|
3983
|
-
mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
|
|
3984
|
-
})
|
|
3985
|
-
.catch((e) => logger.error(e.toString()));
|
|
5266
|
+
this.scheduleReplicationInfoRequests(publicKey);
|
|
3986
5267
|
} else {
|
|
3987
5268
|
await this.removeReplicator(publicKey);
|
|
3988
5269
|
}
|
|
@@ -4025,8 +5306,8 @@ export class SharedLog<
|
|
|
4025
5306
|
leaders: Map<string, unknown> | Set<string>;
|
|
4026
5307
|
}
|
|
4027
5308
|
>,
|
|
4028
|
-
|
|
4029
|
-
|
|
5309
|
+
options?: { timeout?: number; unchecked?: boolean },
|
|
5310
|
+
): Promise<any>[] {
|
|
4030
5311
|
if (options?.unchecked) {
|
|
4031
5312
|
return [...entries.values()].map((x) => {
|
|
4032
5313
|
this._gidPeersHistory.delete(x.entry.meta.gid);
|
|
@@ -4051,30 +5332,57 @@ export class SharedLog<
|
|
|
4051
5332
|
// - An entry is joined, where min replicas is lower than before (for all heads for this particular gid) and therefore we are not replicating anymore for this particular gid
|
|
4052
5333
|
// - Peers join and leave, which means we might not be a replicator anymore
|
|
4053
5334
|
|
|
4054
|
-
|
|
5335
|
+
const promises: Promise<any>[] = [];
|
|
4055
5336
|
|
|
4056
|
-
|
|
4057
|
-
|
|
5337
|
+
let peerToEntries: Map<string, string[]> = new Map();
|
|
5338
|
+
let cleanupTimer: ReturnType<typeof setTimeout>[] = [];
|
|
5339
|
+
const explicitTimeout = options?.timeout != null;
|
|
4058
5340
|
|
|
4059
|
-
|
|
4060
|
-
|
|
4061
|
-
|
|
4062
|
-
|
|
4063
|
-
|
|
4064
|
-
|
|
4065
|
-
|
|
5341
|
+
for (const { entry, leaders } of entries.values()) {
|
|
5342
|
+
for (const leader of leaders.keys()) {
|
|
5343
|
+
let set = peerToEntries.get(leader);
|
|
5344
|
+
if (!set) {
|
|
5345
|
+
set = [];
|
|
5346
|
+
peerToEntries.set(leader, set);
|
|
5347
|
+
}
|
|
4066
5348
|
|
|
4067
|
-
|
|
4068
|
-
|
|
5349
|
+
set.push(entry.hash);
|
|
5350
|
+
}
|
|
4069
5351
|
|
|
4070
|
-
|
|
4071
|
-
|
|
4072
|
-
|
|
4073
|
-
|
|
4074
|
-
|
|
5352
|
+
const pendingPrev = this._pendingDeletes.get(entry.hash);
|
|
5353
|
+
if (pendingPrev) {
|
|
5354
|
+
// If a background prune is already in-flight, an explicit prune request should
|
|
5355
|
+
// still respect the caller's timeout. Otherwise, tests (and user calls) can
|
|
5356
|
+
// block on the longer "checked prune" timeout derived from
|
|
5357
|
+
// `_respondToIHaveTimeout + waitForReplicatorTimeout`, which is intentionally
|
|
5358
|
+
// large for resiliency.
|
|
5359
|
+
if (explicitTimeout) {
|
|
5360
|
+
const timeoutMs = Math.max(0, Math.floor(options?.timeout ?? 0));
|
|
5361
|
+
promises.push(
|
|
5362
|
+
new Promise((resolve, reject) => {
|
|
5363
|
+
// Mirror the checked-prune error prefix so existing callers/tests can
|
|
5364
|
+
// match on the message substring.
|
|
5365
|
+
const timer = setTimeout(() => {
|
|
5366
|
+
reject(
|
|
5367
|
+
new Error(
|
|
5368
|
+
`Timeout for checked pruning after ${timeoutMs}ms (pending=true closed=${this.closed})`,
|
|
5369
|
+
),
|
|
5370
|
+
);
|
|
5371
|
+
}, timeoutMs);
|
|
5372
|
+
timer.unref?.();
|
|
5373
|
+
pendingPrev.promise.promise
|
|
5374
|
+
.then(resolve, reject)
|
|
5375
|
+
.finally(() => clearTimeout(timer));
|
|
5376
|
+
}),
|
|
5377
|
+
);
|
|
5378
|
+
} else {
|
|
5379
|
+
promises.push(pendingPrev.promise.promise);
|
|
5380
|
+
}
|
|
5381
|
+
continue;
|
|
5382
|
+
}
|
|
4075
5383
|
|
|
4076
|
-
|
|
4077
|
-
|
|
5384
|
+
const minReplicas = decodeReplicas(entry);
|
|
5385
|
+
const deferredPromise: DeferredPromise<void> = pDefer();
|
|
4078
5386
|
|
|
4079
5387
|
const clear = () => {
|
|
4080
5388
|
const pending = this._pendingDeletes.get(entry.hash);
|
|
@@ -4084,12 +5392,13 @@ export class SharedLog<
|
|
|
4084
5392
|
clearTimeout(timeout);
|
|
4085
5393
|
};
|
|
4086
5394
|
|
|
4087
|
-
|
|
4088
|
-
|
|
4089
|
-
|
|
4090
|
-
|
|
4091
|
-
|
|
4092
|
-
|
|
5395
|
+
const resolve = () => {
|
|
5396
|
+
clear();
|
|
5397
|
+
this.clearCheckedPruneRetry(entry.hash);
|
|
5398
|
+
cleanupTimer.push(
|
|
5399
|
+
setTimeout(async () => {
|
|
5400
|
+
this._gidPeersHistory.delete(entry.meta.gid);
|
|
5401
|
+
this.removePruneRequestSent(entry.hash);
|
|
4093
5402
|
this._requestIPruneResponseReplicatorSet.delete(entry.hash);
|
|
4094
5403
|
|
|
4095
5404
|
if (
|
|
@@ -4133,12 +5442,19 @@ export class SharedLog<
|
|
|
4133
5442
|
);
|
|
4134
5443
|
};
|
|
4135
5444
|
|
|
4136
|
-
|
|
4137
|
-
|
|
4138
|
-
|
|
4139
|
-
|
|
4140
|
-
|
|
4141
|
-
|
|
5445
|
+
const reject = (e: any) => {
|
|
5446
|
+
clear();
|
|
5447
|
+
const isCheckedPruneTimeout =
|
|
5448
|
+
e instanceof Error &&
|
|
5449
|
+
typeof e.message === "string" &&
|
|
5450
|
+
e.message.startsWith("Timeout for checked pruning");
|
|
5451
|
+
if (explicitTimeout || !isCheckedPruneTimeout) {
|
|
5452
|
+
this.clearCheckedPruneRetry(entry.hash);
|
|
5453
|
+
}
|
|
5454
|
+
this.removePruneRequestSent(entry.hash);
|
|
5455
|
+
this._requestIPruneResponseReplicatorSet.delete(entry.hash);
|
|
5456
|
+
deferredPromise.reject(e);
|
|
5457
|
+
};
|
|
4142
5458
|
|
|
4143
5459
|
let cursor: NumberFromType<R>[] | undefined = undefined;
|
|
4144
5460
|
|
|
@@ -4156,14 +5472,20 @@ export class SharedLog<
|
|
|
4156
5472
|
PRUNE_DEBOUNCE_INTERVAL * 2,
|
|
4157
5473
|
);
|
|
4158
5474
|
|
|
4159
|
-
|
|
4160
|
-
|
|
4161
|
-
|
|
4162
|
-
|
|
4163
|
-
)
|
|
4164
|
-
|
|
4165
|
-
|
|
4166
|
-
|
|
5475
|
+
const timeout = setTimeout(() => {
|
|
5476
|
+
// For internal/background prune flows (no explicit timeout), retry a few times
|
|
5477
|
+
// to avoid "permanently prunable" entries when `_pendingIHave` expires under
|
|
5478
|
+
// heavy load.
|
|
5479
|
+
if (!explicitTimeout) {
|
|
5480
|
+
this.scheduleCheckedPruneRetry({ entry, leaders });
|
|
5481
|
+
}
|
|
5482
|
+
reject(
|
|
5483
|
+
new Error(
|
|
5484
|
+
`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`,
|
|
5485
|
+
),
|
|
5486
|
+
);
|
|
5487
|
+
}, checkedPruneTimeoutMs);
|
|
5488
|
+
timeout.unref?.();
|
|
4167
5489
|
|
|
4168
5490
|
this._pendingDeletes.set(entry.hash, {
|
|
4169
5491
|
promise: deferredPromise,
|
|
@@ -4200,20 +5522,22 @@ export class SharedLog<
|
|
|
4200
5522
|
let existCounter = this._requestIPruneResponseReplicatorSet.get(
|
|
4201
5523
|
entry.hash,
|
|
4202
5524
|
);
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
|
|
4207
|
-
|
|
4208
|
-
|
|
4209
|
-
|
|
4210
|
-
|
|
5525
|
+
if (!existCounter) {
|
|
5526
|
+
existCounter = new Set();
|
|
5527
|
+
this._requestIPruneResponseReplicatorSet.set(
|
|
5528
|
+
entry.hash,
|
|
5529
|
+
existCounter,
|
|
5530
|
+
);
|
|
5531
|
+
}
|
|
5532
|
+
existCounter.add(publicKeyHash);
|
|
5533
|
+
// Seed provider hints so future remote reads can avoid extra round-trips.
|
|
5534
|
+
this.remoteBlocks.hintProviders(entry.hash, [publicKeyHash]);
|
|
4211
5535
|
|
|
4212
|
-
|
|
4213
|
-
|
|
4214
|
-
|
|
4215
|
-
|
|
4216
|
-
|
|
5536
|
+
if (minReplicasValue <= existCounter.size) {
|
|
5537
|
+
resolve();
|
|
5538
|
+
}
|
|
5539
|
+
},
|
|
5540
|
+
});
|
|
4217
5541
|
|
|
4218
5542
|
promises.push(deferredPromise.promise);
|
|
4219
5543
|
}
|
|
@@ -4249,16 +5573,58 @@ export class SharedLog<
|
|
|
4249
5573
|
}
|
|
4250
5574
|
};
|
|
4251
5575
|
|
|
4252
|
-
|
|
4253
|
-
|
|
4254
|
-
|
|
5576
|
+
for (const [k, v] of peerToEntries) {
|
|
5577
|
+
emitMessages(v, k);
|
|
5578
|
+
}
|
|
4255
5579
|
|
|
4256
|
-
|
|
4257
|
-
|
|
4258
|
-
|
|
5580
|
+
// Keep remote `_pendingIHave` alive in the common "leader doesn't have entry yet"
|
|
5581
|
+
// case. This is intentionally disabled when an explicit timeout is provided to
|
|
5582
|
+
// preserve unit tests that assert remote `_pendingIHave` clears promptly.
|
|
5583
|
+
if (!explicitTimeout && peerToEntries.size > 0) {
|
|
5584
|
+
const respondToIHaveTimeout = Number(this._respondToIHaveTimeout ?? 0);
|
|
5585
|
+
const resendIntervalMs = Math.min(
|
|
5586
|
+
CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS,
|
|
5587
|
+
Math.max(
|
|
5588
|
+
CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS,
|
|
5589
|
+
Math.floor(respondToIHaveTimeout / 2) || 1_000,
|
|
5590
|
+
),
|
|
5591
|
+
);
|
|
5592
|
+
let inFlight = false;
|
|
5593
|
+
const timer = setInterval(() => {
|
|
5594
|
+
if (inFlight) return;
|
|
5595
|
+
if (this.closed) return;
|
|
5596
|
+
|
|
5597
|
+
const pendingByPeer: [string, string[]][] = [];
|
|
5598
|
+
for (const [peer, hashes] of peerToEntries) {
|
|
5599
|
+
const pending = hashes.filter((h) => this._pendingDeletes.has(h));
|
|
5600
|
+
if (pending.length > 0) {
|
|
5601
|
+
pendingByPeer.push([peer, pending]);
|
|
5602
|
+
}
|
|
5603
|
+
}
|
|
5604
|
+
if (pendingByPeer.length === 0) {
|
|
5605
|
+
clearInterval(timer);
|
|
5606
|
+
return;
|
|
5607
|
+
}
|
|
5608
|
+
|
|
5609
|
+
inFlight = true;
|
|
5610
|
+
Promise.allSettled(
|
|
5611
|
+
pendingByPeer.map(([peer, hashes]) =>
|
|
5612
|
+
emitMessages(hashes, peer).catch(() => {}),
|
|
5613
|
+
),
|
|
5614
|
+
).finally(() => {
|
|
5615
|
+
inFlight = false;
|
|
5616
|
+
});
|
|
5617
|
+
}, resendIntervalMs);
|
|
5618
|
+
timer.unref?.();
|
|
5619
|
+
cleanupTimer.push(timer as any);
|
|
4259
5620
|
}
|
|
4260
|
-
|
|
4261
|
-
|
|
5621
|
+
|
|
5622
|
+
let cleanup = () => {
|
|
5623
|
+
for (const timer of cleanupTimer) {
|
|
5624
|
+
clearTimeout(timer);
|
|
5625
|
+
}
|
|
5626
|
+
this._closeController.signal.removeEventListener("abort", cleanup);
|
|
5627
|
+
};
|
|
4262
5628
|
|
|
4263
5629
|
Promise.allSettled(promises).finally(cleanup);
|
|
4264
5630
|
this._closeController.signal.addEventListener("abort", cleanup);
|
|
@@ -4336,24 +5702,113 @@ export class SharedLog<
|
|
|
4336
5702
|
|
|
4337
5703
|
await this.log.trim();
|
|
4338
5704
|
|
|
5705
|
+
const batchedChanges = Array.isArray(changeOrChanges[0])
|
|
5706
|
+
? (changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>[])
|
|
5707
|
+
: [changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>];
|
|
5708
|
+
const changes = batchedChanges.flat();
|
|
5709
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
5710
|
+
// On removed ranges (peer leaves / shrink), gid-level history can hide
|
|
5711
|
+
// per-entry gaps. Force a fresh delivery pass for reassigned entries.
|
|
5712
|
+
const forceFreshDelivery = changes.some(
|
|
5713
|
+
(change) => change.type === "removed" && change.range.hash !== selfHash,
|
|
5714
|
+
);
|
|
5715
|
+
const gidPeersHistorySnapshot = new Map<string, Set<string> | undefined>();
|
|
5716
|
+
const dedupeCutoff = Date.now() - RECENT_REPAIR_DISPATCH_TTL_MS;
|
|
5717
|
+
for (const [target, hashes] of this._recentRepairDispatch) {
|
|
5718
|
+
for (const [hash, ts] of hashes) {
|
|
5719
|
+
if (ts <= dedupeCutoff) {
|
|
5720
|
+
hashes.delete(hash);
|
|
5721
|
+
}
|
|
5722
|
+
}
|
|
5723
|
+
if (hashes.size === 0) {
|
|
5724
|
+
this._recentRepairDispatch.delete(target);
|
|
5725
|
+
}
|
|
5726
|
+
}
|
|
5727
|
+
|
|
4339
5728
|
const changed = false;
|
|
5729
|
+
const replacedPeers = new Set<string>();
|
|
5730
|
+
for (const change of changes) {
|
|
5731
|
+
if (change.type === "replaced" && change.range.hash !== selfHash) {
|
|
5732
|
+
replacedPeers.add(change.range.hash);
|
|
5733
|
+
}
|
|
5734
|
+
}
|
|
5735
|
+
const addedPeers = new Set<string>();
|
|
5736
|
+
for (const change of changes) {
|
|
5737
|
+
if (change.type === "added" || change.type === "replaced") {
|
|
5738
|
+
const hash = change.range.hash;
|
|
5739
|
+
if (hash !== selfHash) {
|
|
5740
|
+
// Range updates can reassign entries to an existing peer shortly after it
|
|
5741
|
+
// already received a subset. Avoid suppressing legitimate follow-up repair.
|
|
5742
|
+
this._recentRepairDispatch.delete(hash);
|
|
5743
|
+
}
|
|
5744
|
+
}
|
|
5745
|
+
if (change.type === "added") {
|
|
5746
|
+
const hash = change.range.hash;
|
|
5747
|
+
if (hash !== selfHash && !replacedPeers.has(hash)) {
|
|
5748
|
+
addedPeers.add(hash);
|
|
5749
|
+
}
|
|
5750
|
+
}
|
|
5751
|
+
}
|
|
4340
5752
|
|
|
4341
5753
|
try {
|
|
4342
5754
|
const uncheckedDeliver: Map<
|
|
4343
5755
|
string,
|
|
4344
5756
|
Map<string, EntryReplicated<any>>
|
|
4345
5757
|
> = new Map();
|
|
5758
|
+
const flushUncheckedDeliverTarget = (target: string) => {
|
|
5759
|
+
const entries = uncheckedDeliver.get(target);
|
|
5760
|
+
if (!entries || entries.size === 0) {
|
|
5761
|
+
return;
|
|
5762
|
+
}
|
|
5763
|
+
const isJoinWarmupTarget = addedPeers.has(target);
|
|
5764
|
+
const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
|
|
5765
|
+
this.dispatchMaybeMissingEntries(target, entries, {
|
|
5766
|
+
bypassRecentDedupe,
|
|
5767
|
+
retryScheduleMs: isJoinWarmupTarget
|
|
5768
|
+
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
5769
|
+
: undefined,
|
|
5770
|
+
forceFreshDelivery,
|
|
5771
|
+
});
|
|
5772
|
+
uncheckedDeliver.delete(target);
|
|
5773
|
+
};
|
|
5774
|
+
const queueUncheckedDeliver = (
|
|
5775
|
+
target: string,
|
|
5776
|
+
entry: EntryReplicated<any>,
|
|
5777
|
+
) => {
|
|
5778
|
+
let set = uncheckedDeliver.get(target);
|
|
5779
|
+
if (!set) {
|
|
5780
|
+
set = new Map();
|
|
5781
|
+
uncheckedDeliver.set(target, set);
|
|
5782
|
+
}
|
|
5783
|
+
if (set.has(entry.hash)) {
|
|
5784
|
+
return;
|
|
5785
|
+
}
|
|
5786
|
+
set.set(entry.hash, entry);
|
|
5787
|
+
if (set.size >= this.repairSweepTargetBufferSize) {
|
|
5788
|
+
flushUncheckedDeliverTarget(target);
|
|
5789
|
+
}
|
|
5790
|
+
};
|
|
4346
5791
|
|
|
4347
5792
|
for await (const entryReplicated of toRebalance<R>(
|
|
4348
|
-
|
|
5793
|
+
changes,
|
|
4349
5794
|
this.entryCoordinatesIndex,
|
|
4350
5795
|
this.recentlyRebalanced,
|
|
5796
|
+
{ forceFresh: forceFreshDelivery },
|
|
4351
5797
|
)) {
|
|
4352
5798
|
if (this.closed) {
|
|
4353
5799
|
break;
|
|
4354
5800
|
}
|
|
4355
5801
|
|
|
4356
|
-
let oldPeersSet
|
|
5802
|
+
let oldPeersSet: Set<string> | undefined;
|
|
5803
|
+
if (!forceFreshDelivery) {
|
|
5804
|
+
const gid = entryReplicated.gid;
|
|
5805
|
+
oldPeersSet = gidPeersHistorySnapshot.get(gid);
|
|
5806
|
+
if (!gidPeersHistorySnapshot.has(gid)) {
|
|
5807
|
+
const existing = this._gidPeersHistory.get(gid);
|
|
5808
|
+
oldPeersSet = existing ? new Set(existing) : undefined;
|
|
5809
|
+
gidPeersHistorySnapshot.set(gid, oldPeersSet);
|
|
5810
|
+
}
|
|
5811
|
+
}
|
|
4357
5812
|
let isLeader = false;
|
|
4358
5813
|
|
|
4359
5814
|
let currentPeers = await this.findLeaders(
|
|
@@ -4366,24 +5821,16 @@ export class SharedLog<
|
|
|
4366
5821
|
},
|
|
4367
5822
|
);
|
|
4368
5823
|
|
|
4369
|
-
|
|
4370
|
-
|
|
4371
|
-
|
|
4372
|
-
|
|
4373
|
-
}
|
|
4374
|
-
|
|
4375
|
-
if (!oldPeersSet?.has(currentPeer)) {
|
|
4376
|
-
let set = uncheckedDeliver.get(currentPeer);
|
|
4377
|
-
if (!set) {
|
|
4378
|
-
set = new Map();
|
|
4379
|
-
uncheckedDeliver.set(currentPeer, set);
|
|
5824
|
+
for (const [currentPeer] of currentPeers) {
|
|
5825
|
+
if (currentPeer === this.node.identity.publicKey.hashcode()) {
|
|
5826
|
+
isLeader = true;
|
|
5827
|
+
continue;
|
|
4380
5828
|
}
|
|
4381
5829
|
|
|
4382
|
-
if (!
|
|
4383
|
-
|
|
5830
|
+
if (!oldPeersSet?.has(currentPeer)) {
|
|
5831
|
+
queueUncheckedDeliver(currentPeer, entryReplicated);
|
|
4384
5832
|
}
|
|
4385
5833
|
}
|
|
4386
|
-
}
|
|
4387
5834
|
|
|
4388
5835
|
if (oldPeersSet) {
|
|
4389
5836
|
for (const oldPeer of oldPeersSet) {
|
|
@@ -4414,11 +5861,15 @@ export class SharedLog<
|
|
|
4414
5861
|
this.removePruneRequestSent(entryReplicated.hash);
|
|
4415
5862
|
}
|
|
4416
5863
|
}
|
|
4417
|
-
|
|
4418
|
-
|
|
4419
|
-
|
|
4420
|
-
|
|
4421
|
-
});
|
|
5864
|
+
|
|
5865
|
+
if (forceFreshDelivery || addedPeers.size > 0) {
|
|
5866
|
+
// Schedule a coalesced background sweep for churn/join windows instead of
|
|
5867
|
+
// scanning the whole index synchronously on each replication change.
|
|
5868
|
+
this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
|
|
5869
|
+
}
|
|
5870
|
+
|
|
5871
|
+
for (const target of [...uncheckedDeliver.keys()]) {
|
|
5872
|
+
flushUncheckedDeliverTarget(target);
|
|
4422
5873
|
}
|
|
4423
5874
|
|
|
4424
5875
|
return changed;
|
|
@@ -4438,7 +5889,22 @@ export class SharedLog<
|
|
|
4438
5889
|
evt.detail.topics.map((x) => x),
|
|
4439
5890
|
)} '`,
|
|
4440
5891
|
);
|
|
4441
|
-
|
|
5892
|
+
if (!evt.detail.topics.includes(this.topic)) {
|
|
5893
|
+
return;
|
|
5894
|
+
}
|
|
5895
|
+
|
|
5896
|
+
const fromHash = evt.detail.from.hashcode();
|
|
5897
|
+
this._replicationInfoBlockedPeers.add(fromHash);
|
|
5898
|
+
this._recentRepairDispatch.delete(fromHash);
|
|
5899
|
+
|
|
5900
|
+
// Keep a per-peer timestamp watermark when we observe an unsubscribe. This
|
|
5901
|
+
// prevents late/out-of-order replication-info messages from re-introducing
|
|
5902
|
+
// stale segments for a peer that has already left the topic.
|
|
5903
|
+
const now = BigInt(+new Date());
|
|
5904
|
+
const prev = this.latestReplicationInfoMessage.get(fromHash);
|
|
5905
|
+
if (!prev || prev < now) {
|
|
5906
|
+
this.latestReplicationInfoMessage.set(fromHash, now);
|
|
5907
|
+
}
|
|
4442
5908
|
|
|
4443
5909
|
return this.handleSubscriptionChange(
|
|
4444
5910
|
evt.detail.from,
|
|
@@ -4453,9 +5919,14 @@ export class SharedLog<
|
|
|
4453
5919
|
evt.detail.topics.map((x) => x),
|
|
4454
5920
|
)}'`,
|
|
4455
5921
|
);
|
|
5922
|
+
if (!evt.detail.topics.includes(this.topic)) {
|
|
5923
|
+
return;
|
|
5924
|
+
}
|
|
5925
|
+
|
|
4456
5926
|
this.remoteBlocks.onReachable(evt.detail.from);
|
|
5927
|
+
this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
|
|
4457
5928
|
|
|
4458
|
-
|
|
5929
|
+
await this.handleSubscriptionChange(
|
|
4459
5930
|
evt.detail.from,
|
|
4460
5931
|
evt.detail.topics,
|
|
4461
5932
|
true,
|