@peerbit/shared-log 12.3.5 → 13.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/sync-batch-sweep.d.ts +2 -0
- package/dist/benchmark/sync-batch-sweep.d.ts.map +1 -0
- package/dist/benchmark/sync-batch-sweep.js +305 -0
- package/dist/benchmark/sync-batch-sweep.js.map +1 -0
- package/dist/src/fanout-envelope.d.ts +18 -0
- package/dist/src/fanout-envelope.d.ts.map +1 -0
- package/dist/src/fanout-envelope.js +85 -0
- package/dist/src/fanout-envelope.js.map +1 -0
- package/dist/src/index.d.ts +55 -6
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1595 -339
- package/dist/src/index.js.map +1 -1
- package/dist/src/pid.d.ts.map +1 -1
- package/dist/src/pid.js +21 -5
- package/dist/src/pid.js.map +1 -1
- package/dist/src/ranges.d.ts +3 -1
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +14 -5
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/index.d.ts +45 -1
- package/dist/src/sync/index.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.d.ts +13 -2
- package/dist/src/sync/rateless-iblt.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.js +194 -3
- package/dist/src/sync/rateless-iblt.js.map +1 -1
- package/dist/src/sync/simple.d.ts +24 -3
- package/dist/src/sync/simple.d.ts.map +1 -1
- package/dist/src/sync/simple.js +330 -32
- package/dist/src/sync/simple.js.map +1 -1
- package/package.json +16 -16
- package/src/fanout-envelope.ts +27 -0
- package/src/index.ts +2162 -691
- package/src/pid.ts +22 -4
- package/src/ranges.ts +14 -4
- package/src/sync/index.ts +53 -1
- package/src/sync/rateless-iblt.ts +237 -4
- package/src/sync/simple.ts +427 -41
package/dist/src/index.js
CHANGED
|
@@ -32,19 +32,19 @@ var __runInitializers = (this && this.__runInitializers) || function (thisArg, i
|
|
|
32
32
|
}
|
|
33
33
|
return useValue ? value : void 0;
|
|
34
34
|
};
|
|
35
|
-
import { BorshError, field, variant } from "@dao-xyz/borsh";
|
|
35
|
+
import { BorshError, deserialize, field, serialize, variant } from "@dao-xyz/borsh";
|
|
36
36
|
import { AnyBlockStore, RemoteBlocks } from "@peerbit/blocks";
|
|
37
37
|
import { cidifyString } from "@peerbit/blocks-interface";
|
|
38
38
|
import { Cache } from "@peerbit/cache";
|
|
39
|
-
import { AccessError, PublicSignKey, sha256Base64Sync, sha256Sync, } from "@peerbit/crypto";
|
|
39
|
+
import { AccessError, PublicSignKey, getPublicKeyFromPeerId, sha256Base64Sync, sha256Sync, } from "@peerbit/crypto";
|
|
40
40
|
import { And, ByteMatchQuery, NotStartedError as IndexNotStartedError, Or, Sort, StringMatch, toId, } from "@peerbit/indexer-interface";
|
|
41
41
|
import { Entry, Log, Meta, ShallowEntry, } from "@peerbit/log";
|
|
42
42
|
import { logger as loggerFn } from "@peerbit/logger";
|
|
43
43
|
import { ClosedError, Program } from "@peerbit/program";
|
|
44
|
-
import { waitForSubscribers } from "@peerbit/pubsub";
|
|
44
|
+
import { FanoutChannel, waitForSubscribers, } from "@peerbit/pubsub";
|
|
45
45
|
import { SubscriptionEvent, UnsubcriptionEvent, } from "@peerbit/pubsub-interface";
|
|
46
46
|
import { RPC } from "@peerbit/rpc";
|
|
47
|
-
import { AcknowledgeDelivery, AnyWhere,
|
|
47
|
+
import { AcknowledgeDelivery, AnyWhere, DataMessage, MessageHeader, NotStartedError, SilentDelivery, } from "@peerbit/stream-interface";
|
|
48
48
|
import { AbortError, TimeoutError, debounceAccumulator, debounceFixedInterval, waitFor, } from "@peerbit/time";
|
|
49
49
|
import pDefer, {} from "p-defer";
|
|
50
50
|
import PQueue from "p-queue";
|
|
@@ -54,6 +54,7 @@ import { CPUUsageIntervalLag } from "./cpu.js";
|
|
|
54
54
|
import { debouncedAccumulatorMap, } from "./debounce.js";
|
|
55
55
|
import { NoPeersError } from "./errors.js";
|
|
56
56
|
import { EntryWithRefs, ExchangeHeadsMessage, RequestIPrune, ResponseIPrune, createExchangeHeadsMessages, } from "./exchange-heads.js";
|
|
57
|
+
import { FanoutEnvelope } from "./fanout-envelope.js";
|
|
57
58
|
import { MAX_U32, MAX_U64, bytesToNumber, createNumbers, denormalizer, } from "./integers.js";
|
|
58
59
|
import { TransportMessage } from "./message.js";
|
|
59
60
|
import { PIDReplicationController } from "./pid.js";
|
|
@@ -84,6 +85,34 @@ const getLatestEntry = (entries) => {
|
|
|
84
85
|
}
|
|
85
86
|
return latest;
|
|
86
87
|
};
|
|
88
|
+
const hashToSeed32 = (str) => {
|
|
89
|
+
// FNV-1a 32-bit, fast and deterministic.
|
|
90
|
+
let hash = 0x811c9dc5;
|
|
91
|
+
for (let i = 0; i < str.length; i++) {
|
|
92
|
+
hash ^= str.charCodeAt(i);
|
|
93
|
+
hash = Math.imul(hash, 0x01000193);
|
|
94
|
+
}
|
|
95
|
+
return hash >>> 0;
|
|
96
|
+
};
|
|
97
|
+
const pickDeterministicSubset = (peers, seed, max) => {
|
|
98
|
+
if (peers.length <= max)
|
|
99
|
+
return peers;
|
|
100
|
+
const subset = [];
|
|
101
|
+
const used = new Set();
|
|
102
|
+
let x = seed || 1;
|
|
103
|
+
while (subset.length < max) {
|
|
104
|
+
// xorshift32
|
|
105
|
+
x ^= x << 13;
|
|
106
|
+
x ^= x >>> 17;
|
|
107
|
+
x ^= x << 5;
|
|
108
|
+
const peer = peers[(x >>> 0) % peers.length];
|
|
109
|
+
if (!used.has(peer)) {
|
|
110
|
+
used.add(peer);
|
|
111
|
+
subset.push(peer);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return subset;
|
|
115
|
+
};
|
|
87
116
|
export { BlocksMessage };
|
|
88
117
|
const isAdaptiveReplicatorOption = (options) => {
|
|
89
118
|
if (typeof options === "number") {
|
|
@@ -176,6 +205,10 @@ export const WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS = 3;
|
|
|
176
205
|
// Prefer making pruning robust without timing-based heuristics.
|
|
177
206
|
export const WAIT_FOR_PRUNE_DELAY = 0;
|
|
178
207
|
const PRUNE_DEBOUNCE_INTERVAL = 500;
|
|
208
|
+
const CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS = 250;
|
|
209
|
+
const CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS = 5_000;
|
|
210
|
+
const CHECKED_PRUNE_RETRY_MAX_ATTEMPTS = 3;
|
|
211
|
+
const CHECKED_PRUNE_RETRY_MAX_DELAY_MS = 30_000;
|
|
179
212
|
// DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
|
|
180
213
|
const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
|
|
181
214
|
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE = 0.01;
|
|
@@ -183,6 +216,27 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
|
|
|
183
216
|
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
|
|
184
217
|
const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
|
|
185
218
|
const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
|
|
219
|
+
const RECENT_REPAIR_DISPATCH_TTL_MS = 5_000;
|
|
220
|
+
const REPAIR_SWEEP_ENTRY_BATCH_SIZE = 1_000;
|
|
221
|
+
const REPAIR_SWEEP_TARGET_BUFFER_SIZE = 1024;
|
|
222
|
+
const FORCE_FRESH_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
|
|
223
|
+
const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000];
|
|
224
|
+
const toPositiveInteger = (value, fallback, label) => {
|
|
225
|
+
if (value == null) {
|
|
226
|
+
return fallback;
|
|
227
|
+
}
|
|
228
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
229
|
+
throw new Error(`${label} must be a positive number`);
|
|
230
|
+
}
|
|
231
|
+
return Math.max(1, Math.floor(value));
|
|
232
|
+
};
|
|
233
|
+
const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS = {
|
|
234
|
+
msgRate: 30,
|
|
235
|
+
msgSize: 1024,
|
|
236
|
+
uploadLimitBps: 5_000_000,
|
|
237
|
+
maxChildren: 24,
|
|
238
|
+
repair: true,
|
|
239
|
+
};
|
|
186
240
|
const getIdForDynamicRange = (publicKey) => {
|
|
187
241
|
return sha256Sync(concat([publicKey.bytes, new TextEncoder().encode("dynamic")]));
|
|
188
242
|
};
|
|
@@ -226,12 +280,17 @@ let SharedLog = (() => {
|
|
|
226
280
|
coordinateToHash;
|
|
227
281
|
recentlyRebalanced;
|
|
228
282
|
uniqueReplicators;
|
|
283
|
+
_replicatorJoinEmitted;
|
|
229
284
|
_replicatorsReconciled;
|
|
230
285
|
/* private _totalParticipation!: number; */
|
|
231
286
|
// gid -> coordinate -> publicKeyHash list (of owners)
|
|
232
287
|
_gidPeersHistory;
|
|
233
288
|
_onSubscriptionFn;
|
|
234
289
|
_onUnsubscriptionFn;
|
|
290
|
+
_onFanoutDataFn;
|
|
291
|
+
_onFanoutUnicastFn;
|
|
292
|
+
_fanoutChannel;
|
|
293
|
+
_providerHandle;
|
|
235
294
|
_isTrustedReplicator;
|
|
236
295
|
_logProperties;
|
|
237
296
|
_closeController;
|
|
@@ -241,6 +300,12 @@ let SharedLog = (() => {
|
|
|
241
300
|
// public key hash to range id to range
|
|
242
301
|
pendingMaturity; // map of peerId to timeout
|
|
243
302
|
latestReplicationInfoMessage;
|
|
303
|
+
// Peers that have unsubscribed from this log's topic. We ignore replication-info
|
|
304
|
+
// messages from them until we see a new subscription, to avoid re-introducing
|
|
305
|
+
// stale membership state during close/unsubscribe races.
|
|
306
|
+
_replicationInfoBlockedPeers;
|
|
307
|
+
_replicationInfoRequestByPeer;
|
|
308
|
+
_replicationInfoApplyQueueByPeer;
|
|
244
309
|
remoteBlocks;
|
|
245
310
|
openTime;
|
|
246
311
|
oldestOpenTime;
|
|
@@ -252,7 +317,13 @@ let SharedLog = (() => {
|
|
|
252
317
|
responseToPruneDebouncedFn;
|
|
253
318
|
_requestIPruneSent; // tracks entry hash to peer hash for requesting I prune messages
|
|
254
319
|
_requestIPruneResponseReplicatorSet; // tracks entry hash to peer hash
|
|
320
|
+
_checkedPruneRetries;
|
|
255
321
|
replicationChangeDebounceFn;
|
|
322
|
+
_repairRetryTimers;
|
|
323
|
+
_recentRepairDispatch;
|
|
324
|
+
_repairSweepRunning;
|
|
325
|
+
_repairSweepForceFreshPending;
|
|
326
|
+
_repairSweepAddedPeersPending;
|
|
256
327
|
// regular distribution checks
|
|
257
328
|
distributeQueue;
|
|
258
329
|
syncronizer;
|
|
@@ -264,6 +335,7 @@ let SharedLog = (() => {
|
|
|
264
335
|
waitForReplicatorRequestMaxAttempts;
|
|
265
336
|
waitForPruneDelay;
|
|
266
337
|
distributionDebounceTime;
|
|
338
|
+
repairSweepTargetBufferSize;
|
|
267
339
|
replicationController;
|
|
268
340
|
history;
|
|
269
341
|
domain;
|
|
@@ -283,6 +355,561 @@ let SharedLog = (() => {
|
|
|
283
355
|
get v8Behaviour() {
|
|
284
356
|
return (this.compatibility ?? Number.MAX_VALUE) < 9;
|
|
285
357
|
}
|
|
358
|
+
getFanoutChannelOptions(options) {
|
|
359
|
+
return {
|
|
360
|
+
...DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS,
|
|
361
|
+
...(options?.channel ?? {}),
|
|
362
|
+
};
|
|
363
|
+
}
|
|
364
|
+
async _openFanoutChannel(options) {
|
|
365
|
+
this._closeFanoutChannel();
|
|
366
|
+
if (!options) {
|
|
367
|
+
return;
|
|
368
|
+
}
|
|
369
|
+
const fanoutService = this.node.services.fanout;
|
|
370
|
+
if (!fanoutService) {
|
|
371
|
+
throw new Error(`Fanout is configured for shared-log topic ${this.topic}, but no fanout service is available on this client`);
|
|
372
|
+
}
|
|
373
|
+
const resolvedRoot = options.root ??
|
|
374
|
+
(await fanoutService?.topicRootControlPlane?.resolveTopicRoot?.(this.topic));
|
|
375
|
+
if (!resolvedRoot) {
|
|
376
|
+
throw new Error(`Fanout is configured for shared-log topic ${this.topic}, but no fanout root was provided and none could be resolved`);
|
|
377
|
+
}
|
|
378
|
+
const channel = new FanoutChannel(fanoutService, {
|
|
379
|
+
topic: this.topic,
|
|
380
|
+
root: resolvedRoot,
|
|
381
|
+
});
|
|
382
|
+
this._fanoutChannel = channel;
|
|
383
|
+
this._onFanoutDataFn =
|
|
384
|
+
this._onFanoutDataFn ||
|
|
385
|
+
((evt) => {
|
|
386
|
+
const detail = evt?.detail;
|
|
387
|
+
if (!detail) {
|
|
388
|
+
return;
|
|
389
|
+
}
|
|
390
|
+
void this._onFanoutData(detail).catch((error) => logger.error(error));
|
|
391
|
+
});
|
|
392
|
+
channel.addEventListener("data", this._onFanoutDataFn);
|
|
393
|
+
this._onFanoutUnicastFn =
|
|
394
|
+
this._onFanoutUnicastFn ||
|
|
395
|
+
((evt) => {
|
|
396
|
+
const detail = evt?.detail;
|
|
397
|
+
if (!detail) {
|
|
398
|
+
return;
|
|
399
|
+
}
|
|
400
|
+
void this._onFanoutUnicast(detail).catch((error) => logger.error(error));
|
|
401
|
+
});
|
|
402
|
+
channel.addEventListener("unicast", this._onFanoutUnicastFn);
|
|
403
|
+
try {
|
|
404
|
+
const channelOptions = this.getFanoutChannelOptions(options);
|
|
405
|
+
if (resolvedRoot === fanoutService.publicKeyHash) {
|
|
406
|
+
await channel.openAsRoot(channelOptions);
|
|
407
|
+
return;
|
|
408
|
+
}
|
|
409
|
+
await channel.join(channelOptions, options.join);
|
|
410
|
+
}
|
|
411
|
+
catch (error) {
|
|
412
|
+
this._closeFanoutChannel();
|
|
413
|
+
throw error;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
_closeFanoutChannel() {
|
|
417
|
+
if (this._fanoutChannel) {
|
|
418
|
+
if (this._onFanoutDataFn) {
|
|
419
|
+
this._fanoutChannel.removeEventListener("data", this._onFanoutDataFn);
|
|
420
|
+
}
|
|
421
|
+
if (this._onFanoutUnicastFn) {
|
|
422
|
+
this._fanoutChannel.removeEventListener("unicast", this._onFanoutUnicastFn);
|
|
423
|
+
}
|
|
424
|
+
this._fanoutChannel.close();
|
|
425
|
+
}
|
|
426
|
+
this._fanoutChannel = undefined;
|
|
427
|
+
}
|
|
428
|
+
async _onFanoutData(detail) {
|
|
429
|
+
let envelope;
|
|
430
|
+
try {
|
|
431
|
+
envelope = deserialize(detail.payload, FanoutEnvelope);
|
|
432
|
+
}
|
|
433
|
+
catch (error) {
|
|
434
|
+
if (error instanceof BorshError) {
|
|
435
|
+
return;
|
|
436
|
+
}
|
|
437
|
+
throw error;
|
|
438
|
+
}
|
|
439
|
+
let message;
|
|
440
|
+
try {
|
|
441
|
+
message = deserialize(envelope.payload, TransportMessage);
|
|
442
|
+
}
|
|
443
|
+
catch (error) {
|
|
444
|
+
if (error instanceof BorshError) {
|
|
445
|
+
return;
|
|
446
|
+
}
|
|
447
|
+
throw error;
|
|
448
|
+
}
|
|
449
|
+
if (!(message instanceof ExchangeHeadsMessage)) {
|
|
450
|
+
return;
|
|
451
|
+
}
|
|
452
|
+
const from = (await this._resolvePublicKeyFromHash(envelope.from)) ??
|
|
453
|
+
{ hashcode: () => envelope.from };
|
|
454
|
+
const contextMessage = new DataMessage({
|
|
455
|
+
header: new MessageHeader({
|
|
456
|
+
session: 0,
|
|
457
|
+
mode: new AnyWhere(),
|
|
458
|
+
priority: 0,
|
|
459
|
+
}),
|
|
460
|
+
});
|
|
461
|
+
contextMessage.header.timestamp = envelope.timestamp;
|
|
462
|
+
await this.onMessage(message, {
|
|
463
|
+
from,
|
|
464
|
+
message: contextMessage,
|
|
465
|
+
});
|
|
466
|
+
}
|
|
467
|
+
async _onFanoutUnicast(detail) {
|
|
468
|
+
let message;
|
|
469
|
+
try {
|
|
470
|
+
message = deserialize(detail.payload, TransportMessage);
|
|
471
|
+
}
|
|
472
|
+
catch (error) {
|
|
473
|
+
if (error instanceof BorshError) {
|
|
474
|
+
return;
|
|
475
|
+
}
|
|
476
|
+
throw error;
|
|
477
|
+
}
|
|
478
|
+
const fromHash = detail.origin || detail.from;
|
|
479
|
+
const from = (await this._resolvePublicKeyFromHash(fromHash)) ??
|
|
480
|
+
{ hashcode: () => fromHash };
|
|
481
|
+
const contextMessage = new DataMessage({
|
|
482
|
+
header: new MessageHeader({
|
|
483
|
+
session: 0,
|
|
484
|
+
mode: new AnyWhere(),
|
|
485
|
+
priority: 0,
|
|
486
|
+
}),
|
|
487
|
+
});
|
|
488
|
+
contextMessage.header.timestamp = detail.timestamp;
|
|
489
|
+
await this.onMessage(message, {
|
|
490
|
+
from,
|
|
491
|
+
message: contextMessage,
|
|
492
|
+
});
|
|
493
|
+
}
|
|
494
|
+
async _publishExchangeHeadsViaFanout(message) {
|
|
495
|
+
if (!this._fanoutChannel) {
|
|
496
|
+
throw new Error(`No fanout channel configured for shared-log topic ${this.topic}`);
|
|
497
|
+
}
|
|
498
|
+
const envelope = new FanoutEnvelope({
|
|
499
|
+
from: this.node.identity.publicKey.hashcode(),
|
|
500
|
+
timestamp: BigInt(Date.now()),
|
|
501
|
+
payload: serialize(message),
|
|
502
|
+
});
|
|
503
|
+
await this._fanoutChannel.publish(serialize(envelope));
|
|
504
|
+
}
|
|
505
|
+
_parseDeliveryOptions(deliveryArg) {
|
|
506
|
+
const delivery = deliveryArg === undefined || deliveryArg === false
|
|
507
|
+
? undefined
|
|
508
|
+
: deliveryArg === true
|
|
509
|
+
? { reliability: "ack" }
|
|
510
|
+
: deliveryArg;
|
|
511
|
+
if (!delivery) {
|
|
512
|
+
return {
|
|
513
|
+
delivery: undefined,
|
|
514
|
+
reliability: "best-effort",
|
|
515
|
+
requireRecipients: false,
|
|
516
|
+
minAcks: undefined,
|
|
517
|
+
wrap: undefined,
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
const reliability = delivery.reliability ?? "ack";
|
|
521
|
+
const deliveryTimeout = delivery.timeout;
|
|
522
|
+
const deliverySignal = delivery.signal;
|
|
523
|
+
const requireRecipients = delivery.requireRecipients === true;
|
|
524
|
+
const minAcks = delivery.minAcks != null && Number.isFinite(delivery.minAcks)
|
|
525
|
+
? Math.max(0, Math.floor(delivery.minAcks))
|
|
526
|
+
: undefined;
|
|
527
|
+
const wrap = deliveryTimeout == null && deliverySignal == null
|
|
528
|
+
? undefined
|
|
529
|
+
: (promise) => new Promise((resolve, reject) => {
|
|
530
|
+
let settled = false;
|
|
531
|
+
let timer = undefined;
|
|
532
|
+
const onAbort = () => {
|
|
533
|
+
if (settled) {
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
settled = true;
|
|
537
|
+
promise.catch(() => { });
|
|
538
|
+
cleanup();
|
|
539
|
+
reject(new AbortError());
|
|
540
|
+
};
|
|
541
|
+
const cleanup = () => {
|
|
542
|
+
if (timer != null) {
|
|
543
|
+
clearTimeout(timer);
|
|
544
|
+
timer = undefined;
|
|
545
|
+
}
|
|
546
|
+
deliverySignal?.removeEventListener("abort", onAbort);
|
|
547
|
+
};
|
|
548
|
+
if (deliverySignal) {
|
|
549
|
+
if (deliverySignal.aborted) {
|
|
550
|
+
onAbort();
|
|
551
|
+
return;
|
|
552
|
+
}
|
|
553
|
+
deliverySignal.addEventListener("abort", onAbort);
|
|
554
|
+
}
|
|
555
|
+
if (deliveryTimeout != null) {
|
|
556
|
+
timer = setTimeout(() => {
|
|
557
|
+
if (settled) {
|
|
558
|
+
return;
|
|
559
|
+
}
|
|
560
|
+
settled = true;
|
|
561
|
+
promise.catch(() => { });
|
|
562
|
+
cleanup();
|
|
563
|
+
reject(new TimeoutError(`Timeout waiting for delivery`));
|
|
564
|
+
}, deliveryTimeout);
|
|
565
|
+
}
|
|
566
|
+
promise
|
|
567
|
+
.then(() => {
|
|
568
|
+
if (settled) {
|
|
569
|
+
return;
|
|
570
|
+
}
|
|
571
|
+
settled = true;
|
|
572
|
+
cleanup();
|
|
573
|
+
resolve();
|
|
574
|
+
})
|
|
575
|
+
.catch((error) => {
|
|
576
|
+
if (settled) {
|
|
577
|
+
return;
|
|
578
|
+
}
|
|
579
|
+
settled = true;
|
|
580
|
+
cleanup();
|
|
581
|
+
reject(error);
|
|
582
|
+
});
|
|
583
|
+
});
|
|
584
|
+
return {
|
|
585
|
+
delivery,
|
|
586
|
+
reliability,
|
|
587
|
+
requireRecipients,
|
|
588
|
+
minAcks,
|
|
589
|
+
wrap,
|
|
590
|
+
};
|
|
591
|
+
}
|
|
592
|
+
async _getSortedRouteHints(targetHash) {
|
|
593
|
+
const pubsub = this.node.services.pubsub;
|
|
594
|
+
const maybeHints = await pubsub?.getUnifiedRouteHints?.(this.topic, targetHash);
|
|
595
|
+
const hints = Array.isArray(maybeHints) ? maybeHints : [];
|
|
596
|
+
const now = Date.now();
|
|
597
|
+
return hints
|
|
598
|
+
.filter((hint) => hint.expiresAt == null || hint.expiresAt > now)
|
|
599
|
+
.sort((a, b) => {
|
|
600
|
+
const rankA = a.kind === "directstream-ack" ? 0 : 1;
|
|
601
|
+
const rankB = b.kind === "directstream-ack" ? 0 : 1;
|
|
602
|
+
if (rankA !== rankB) {
|
|
603
|
+
return rankA - rankB;
|
|
604
|
+
}
|
|
605
|
+
const costA = a.kind === "directstream-ack"
|
|
606
|
+
? a.distance
|
|
607
|
+
: Math.max(0, (a.route?.length ?? 1) - 1);
|
|
608
|
+
const costB = b.kind === "directstream-ack"
|
|
609
|
+
? b.distance
|
|
610
|
+
: Math.max(0, (b.route?.length ?? 1) - 1);
|
|
611
|
+
if (costA !== costB) {
|
|
612
|
+
return costA - costB;
|
|
613
|
+
}
|
|
614
|
+
return (b.updatedAt ?? 0) - (a.updatedAt ?? 0);
|
|
615
|
+
});
|
|
616
|
+
}
|
|
617
|
+
async _sendAckWithUnifiedHints(properties) {
|
|
618
|
+
const { peer, message, payload, fanoutUnicastOptions } = properties;
|
|
619
|
+
const hints = await this._getSortedRouteHints(peer);
|
|
620
|
+
const hasDirectHint = hints.some((hint) => hint.kind === "directstream-ack");
|
|
621
|
+
const fanoutHint = hints.find((hint) => hint.kind === "fanout-token");
|
|
622
|
+
if (hasDirectHint) {
|
|
623
|
+
try {
|
|
624
|
+
await this.rpc.send(message, {
|
|
625
|
+
mode: new AcknowledgeDelivery({
|
|
626
|
+
redundancy: 1,
|
|
627
|
+
to: [peer],
|
|
628
|
+
}),
|
|
629
|
+
});
|
|
630
|
+
return;
|
|
631
|
+
}
|
|
632
|
+
catch {
|
|
633
|
+
// Fall back to fanout token/direct fanout unicast below.
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
if (fanoutHint && this._fanoutChannel) {
|
|
637
|
+
try {
|
|
638
|
+
await this._fanoutChannel.unicastAck(fanoutHint.route, payload, fanoutUnicastOptions);
|
|
639
|
+
return;
|
|
640
|
+
}
|
|
641
|
+
catch {
|
|
642
|
+
// Fall back below.
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
if (this._fanoutChannel) {
|
|
646
|
+
try {
|
|
647
|
+
await this._fanoutChannel.unicastToAck(peer, payload, fanoutUnicastOptions);
|
|
648
|
+
return;
|
|
649
|
+
}
|
|
650
|
+
catch {
|
|
651
|
+
// Fall back below.
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
await this.rpc.send(message, {
|
|
655
|
+
mode: new AcknowledgeDelivery({
|
|
656
|
+
redundancy: 1,
|
|
657
|
+
to: [peer],
|
|
658
|
+
}),
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
async _appendDeliverToReplicators(entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
|
|
662
|
+
const { delivery, reliability, requireRecipients, minAcks, wrap } = this._parseDeliveryOptions(deliveryArg);
|
|
663
|
+
const pending = [];
|
|
664
|
+
const track = (promise) => {
|
|
665
|
+
pending.push(wrap ? wrap(promise) : promise);
|
|
666
|
+
};
|
|
667
|
+
const fanoutUnicastOptions = delivery?.timeout != null || delivery?.signal != null
|
|
668
|
+
? { timeoutMs: delivery.timeout, signal: delivery.signal }
|
|
669
|
+
: undefined;
|
|
670
|
+
for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
|
|
671
|
+
await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
|
|
672
|
+
const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
|
|
673
|
+
const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
|
|
674
|
+
let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
675
|
+
const allowSubscriberFallback = this.syncronizer instanceof SimpleSyncronizer ||
|
|
676
|
+
(this.compatibility ?? Number.MAX_VALUE) < 10;
|
|
677
|
+
if (!hasRemotePeers && allowSubscriberFallback) {
|
|
678
|
+
try {
|
|
679
|
+
const subscribers = await this._getTopicSubscribers(this.topic);
|
|
680
|
+
if (subscribers && subscribers.length > 0) {
|
|
681
|
+
for (const subscriber of subscribers) {
|
|
682
|
+
const hash = subscriber.hashcode();
|
|
683
|
+
if (hash === selfHash) {
|
|
684
|
+
continue;
|
|
685
|
+
}
|
|
686
|
+
set.add(hash);
|
|
687
|
+
leadersForDelivery?.add(hash);
|
|
688
|
+
}
|
|
689
|
+
hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
catch {
|
|
693
|
+
// Best-effort only; keep discovered recipients as-is.
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
if (!hasRemotePeers) {
|
|
697
|
+
if (requireRecipients) {
|
|
698
|
+
throw new NoPeersError(this.rpc.topic);
|
|
699
|
+
}
|
|
700
|
+
continue;
|
|
701
|
+
}
|
|
702
|
+
if (!delivery) {
|
|
703
|
+
this.rpc
|
|
704
|
+
.send(message, {
|
|
705
|
+
mode: isLeader
|
|
706
|
+
? new SilentDelivery({ redundancy: 1, to: set })
|
|
707
|
+
: new AcknowledgeDelivery({ redundancy: 1, to: set }),
|
|
708
|
+
})
|
|
709
|
+
.catch((error) => logger.error(error));
|
|
710
|
+
continue;
|
|
711
|
+
}
|
|
712
|
+
const orderedRemoteRecipients = [];
|
|
713
|
+
for (const peer of leadersForDelivery) {
|
|
714
|
+
if (peer === selfHash) {
|
|
715
|
+
continue;
|
|
716
|
+
}
|
|
717
|
+
orderedRemoteRecipients.push(peer);
|
|
718
|
+
}
|
|
719
|
+
for (const peer of set) {
|
|
720
|
+
if (peer === selfHash) {
|
|
721
|
+
continue;
|
|
722
|
+
}
|
|
723
|
+
if (leadersForDelivery.has(peer)) {
|
|
724
|
+
continue;
|
|
725
|
+
}
|
|
726
|
+
orderedRemoteRecipients.push(peer);
|
|
727
|
+
}
|
|
728
|
+
const ackTo = [];
|
|
729
|
+
let silentTo;
|
|
730
|
+
// Default delivery semantics: require enough remote ACKs to reach the requested
|
|
731
|
+
// replication degree (local append counts as 1).
|
|
732
|
+
const defaultMinAcks = Math.max(0, minReplicasValue - 1);
|
|
733
|
+
const ackLimitRaw = reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
|
|
734
|
+
const ackLimit = Math.max(0, Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length));
|
|
735
|
+
for (const peer of orderedRemoteRecipients) {
|
|
736
|
+
if (ackTo.length < ackLimit) {
|
|
737
|
+
ackTo.push(peer);
|
|
738
|
+
}
|
|
739
|
+
else {
|
|
740
|
+
silentTo ||= [];
|
|
741
|
+
silentTo.push(peer);
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
if (requireRecipients && orderedRemoteRecipients.length === 0) {
|
|
745
|
+
throw new NoPeersError(this.rpc.topic);
|
|
746
|
+
}
|
|
747
|
+
if (requireRecipients && ackTo.length + (silentTo?.length || 0) === 0) {
|
|
748
|
+
throw new NoPeersError(this.rpc.topic);
|
|
749
|
+
}
|
|
750
|
+
if (ackTo.length > 0) {
|
|
751
|
+
const payload = serialize(message);
|
|
752
|
+
for (const peer of ackTo) {
|
|
753
|
+
track((async () => {
|
|
754
|
+
await this._sendAckWithUnifiedHints({
|
|
755
|
+
peer,
|
|
756
|
+
message,
|
|
757
|
+
payload,
|
|
758
|
+
fanoutUnicastOptions,
|
|
759
|
+
});
|
|
760
|
+
})());
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
if (silentTo?.length) {
|
|
764
|
+
this.rpc
|
|
765
|
+
.send(message, {
|
|
766
|
+
mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
|
|
767
|
+
})
|
|
768
|
+
.catch((error) => logger.error(error));
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
if (pending.length > 0) {
|
|
772
|
+
await Promise.all(pending);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
async _mergeLeadersFromGidReferences(message, minReplicasValue, leaders) {
|
|
776
|
+
const gidReferences = message.heads[0]?.gidRefrences;
|
|
777
|
+
if (!gidReferences || gidReferences.length === 0) {
|
|
778
|
+
return;
|
|
779
|
+
}
|
|
780
|
+
for (const gidReference of gidReferences) {
|
|
781
|
+
const entryFromGid = this.log.entryIndex.getHeads(gidReference, false);
|
|
782
|
+
for (const gidEntry of await entryFromGid.all()) {
|
|
783
|
+
let coordinates = await this.getCoordinates(gidEntry);
|
|
784
|
+
if (coordinates == null) {
|
|
785
|
+
coordinates = await this.createCoordinates(gidEntry, minReplicasValue);
|
|
786
|
+
}
|
|
787
|
+
const found = await this._findLeaders(coordinates);
|
|
788
|
+
for (const [key, value] of found) {
|
|
789
|
+
leaders.set(key, value);
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
async _appendDeliverToAllFanout(entry) {
|
|
795
|
+
for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
|
|
796
|
+
await this._publishExchangeHeadsViaFanout(message);
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
async _resolvePublicKeyFromHash(hash) {
|
|
800
|
+
const fanoutService = this.node.services.fanout;
|
|
801
|
+
return (fanoutService?.getPublicKey?.(hash) ??
|
|
802
|
+
this.node.services.pubsub.getPublicKey(hash));
|
|
803
|
+
}
|
|
804
|
+
async _getTopicSubscribers(topic) {
|
|
805
|
+
const maxPeers = 64;
|
|
806
|
+
// Prefer the bounded peer set we already know from the fanout overlay.
|
|
807
|
+
if (this._fanoutChannel && (topic === this.topic || topic === this.rpc.topic)) {
|
|
808
|
+
const hashes = this._fanoutChannel
|
|
809
|
+
.getPeerHashes({ includeSelf: false })
|
|
810
|
+
.slice(0, maxPeers);
|
|
811
|
+
if (hashes.length === 0)
|
|
812
|
+
return [];
|
|
813
|
+
const keys = await Promise.all(hashes.map((hash) => this._resolvePublicKeyFromHash(hash)));
|
|
814
|
+
const uniqueKeys = [];
|
|
815
|
+
const seen = new Set();
|
|
816
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
817
|
+
for (const key of keys) {
|
|
818
|
+
if (!key)
|
|
819
|
+
continue;
|
|
820
|
+
const hash = key.hashcode();
|
|
821
|
+
if (hash === selfHash)
|
|
822
|
+
continue;
|
|
823
|
+
if (seen.has(hash))
|
|
824
|
+
continue;
|
|
825
|
+
seen.add(hash);
|
|
826
|
+
uniqueKeys.push(key);
|
|
827
|
+
}
|
|
828
|
+
return uniqueKeys;
|
|
829
|
+
}
|
|
830
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
831
|
+
const hashes = [];
|
|
832
|
+
// Best-effort provider discovery (bounded). This requires bootstrap trackers.
|
|
833
|
+
try {
|
|
834
|
+
const fanoutService = this.node.services.fanout;
|
|
835
|
+
if (fanoutService?.queryProviders) {
|
|
836
|
+
const ns = `shared-log|${this.topic}`;
|
|
837
|
+
const seed = hashToSeed32(topic);
|
|
838
|
+
const providers = await fanoutService.queryProviders(ns, {
|
|
839
|
+
want: maxPeers,
|
|
840
|
+
seed,
|
|
841
|
+
});
|
|
842
|
+
for (const h of providers ?? []) {
|
|
843
|
+
if (!h || h === selfHash)
|
|
844
|
+
continue;
|
|
845
|
+
hashes.push(h);
|
|
846
|
+
if (hashes.length >= maxPeers)
|
|
847
|
+
break;
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
catch {
|
|
852
|
+
// Best-effort only.
|
|
853
|
+
}
|
|
854
|
+
// Next, use already-connected peer streams (bounded and cheap).
|
|
855
|
+
const peerMap = this.node.services.pubsub
|
|
856
|
+
?.peers;
|
|
857
|
+
if (peerMap?.keys) {
|
|
858
|
+
for (const h of peerMap.keys()) {
|
|
859
|
+
if (!h || h === selfHash)
|
|
860
|
+
continue;
|
|
861
|
+
hashes.push(h);
|
|
862
|
+
if (hashes.length >= maxPeers)
|
|
863
|
+
break;
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
// Finally, fall back to libp2p connections (e.g. bootstrap peers) without requiring
|
|
867
|
+
// any global topic membership view.
|
|
868
|
+
if (hashes.length < maxPeers) {
|
|
869
|
+
const connectionManager = this.node.services.pubsub?.components
|
|
870
|
+
?.connectionManager;
|
|
871
|
+
const connections = connectionManager?.getConnections?.() ?? [];
|
|
872
|
+
for (const conn of connections) {
|
|
873
|
+
const peerId = conn?.remotePeer;
|
|
874
|
+
if (!peerId)
|
|
875
|
+
continue;
|
|
876
|
+
try {
|
|
877
|
+
const h = getPublicKeyFromPeerId(peerId).hashcode();
|
|
878
|
+
if (!h || h === selfHash)
|
|
879
|
+
continue;
|
|
880
|
+
hashes.push(h);
|
|
881
|
+
if (hashes.length >= maxPeers)
|
|
882
|
+
break;
|
|
883
|
+
}
|
|
884
|
+
catch {
|
|
885
|
+
// Best-effort only.
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
if (hashes.length === 0)
|
|
890
|
+
return [];
|
|
891
|
+
const uniqueHashes = [];
|
|
892
|
+
const seen = new Set();
|
|
893
|
+
for (const h of hashes) {
|
|
894
|
+
if (seen.has(h))
|
|
895
|
+
continue;
|
|
896
|
+
seen.add(h);
|
|
897
|
+
uniqueHashes.push(h);
|
|
898
|
+
if (uniqueHashes.length >= maxPeers)
|
|
899
|
+
break;
|
|
900
|
+
}
|
|
901
|
+
const keys = await Promise.all(uniqueHashes.map((hash) => this._resolvePublicKeyFromHash(hash)));
|
|
902
|
+
const uniqueKeys = [];
|
|
903
|
+
for (const key of keys) {
|
|
904
|
+
if (!key)
|
|
905
|
+
continue;
|
|
906
|
+
const hash = key.hashcode();
|
|
907
|
+
if (hash === selfHash)
|
|
908
|
+
continue;
|
|
909
|
+
uniqueKeys.push(key);
|
|
910
|
+
}
|
|
911
|
+
return uniqueKeys;
|
|
912
|
+
}
|
|
286
913
|
// @deprecated
|
|
287
914
|
async getRole() {
|
|
288
915
|
const segments = await this.getMyReplicationSegments();
|
|
@@ -586,6 +1213,7 @@ let SharedLog = (() => {
|
|
|
586
1213
|
})
|
|
587
1214
|
.all();
|
|
588
1215
|
this.uniqueReplicators.delete(keyHash);
|
|
1216
|
+
this._replicatorJoinEmitted.delete(keyHash);
|
|
589
1217
|
await this.replicationIndex.del({ query: { hash: keyHash } });
|
|
590
1218
|
await this.updateOldestTimestampFromIndex();
|
|
591
1219
|
const isMe = this.node.identity.publicKey.hashcode() === keyHash;
|
|
@@ -620,6 +1248,13 @@ let SharedLog = (() => {
|
|
|
620
1248
|
}
|
|
621
1249
|
this.pendingMaturity.delete(keyHash);
|
|
622
1250
|
}
|
|
1251
|
+
// Keep local sync/prune state consistent even when a peer disappears
|
|
1252
|
+
// through replication-info updates without a topic unsubscribe event.
|
|
1253
|
+
this.removePeerFromGidPeerHistory(keyHash);
|
|
1254
|
+
this._recentRepairDispatch.delete(keyHash);
|
|
1255
|
+
if (!isMe) {
|
|
1256
|
+
this.syncronizer.onPeerDisconnected(keyHash);
|
|
1257
|
+
}
|
|
623
1258
|
if (!isMe) {
|
|
624
1259
|
this.rebalanceParticipationDebounced?.call();
|
|
625
1260
|
}
|
|
@@ -668,6 +1303,7 @@ let SharedLog = (() => {
|
|
|
668
1303
|
const otherSegmentsIterator = this.replicationIndex.iterate({ query: { hash: from.hashcode() } }, { shape: { id: true } });
|
|
669
1304
|
if ((await otherSegmentsIterator.next(1)).length === 0) {
|
|
670
1305
|
this.uniqueReplicators.delete(from.hashcode());
|
|
1306
|
+
this._replicatorJoinEmitted.delete(from.hashcode());
|
|
671
1307
|
}
|
|
672
1308
|
await otherSegmentsIterator.close();
|
|
673
1309
|
await this.updateOldestTimestampFromIndex();
|
|
@@ -687,6 +1323,7 @@ let SharedLog = (() => {
|
|
|
687
1323
|
rebalance = rebalance == null ? true : rebalance;
|
|
688
1324
|
let diffs;
|
|
689
1325
|
let deleted = undefined;
|
|
1326
|
+
let isStoppedReplicating = false;
|
|
690
1327
|
if (reset) {
|
|
691
1328
|
deleted = (await this.replicationIndex
|
|
692
1329
|
.iterate({
|
|
@@ -718,6 +1355,7 @@ let SharedLog = (() => {
|
|
|
718
1355
|
];
|
|
719
1356
|
}
|
|
720
1357
|
isNewReplicator = prevCount === 0 && ranges.length > 0;
|
|
1358
|
+
isStoppedReplicating = prevCount > 0 && ranges.length === 0;
|
|
721
1359
|
}
|
|
722
1360
|
else {
|
|
723
1361
|
let batchSize = 100;
|
|
@@ -791,7 +1429,16 @@ let SharedLog = (() => {
|
|
|
791
1429
|
.flat();
|
|
792
1430
|
diffs = changes;
|
|
793
1431
|
}
|
|
794
|
-
|
|
1432
|
+
const fromHash = from.hashcode();
|
|
1433
|
+
// Track replicator membership transitions synchronously so join/leave events are
|
|
1434
|
+
// idempotent even if we process concurrent reset messages/unsubscribes.
|
|
1435
|
+
const stoppedTransition = ranges.length === 0 ? this.uniqueReplicators.delete(fromHash) : false;
|
|
1436
|
+
if (ranges.length === 0) {
|
|
1437
|
+
this._replicatorJoinEmitted.delete(fromHash);
|
|
1438
|
+
}
|
|
1439
|
+
else {
|
|
1440
|
+
this.uniqueReplicators.add(fromHash);
|
|
1441
|
+
}
|
|
795
1442
|
let now = +new Date();
|
|
796
1443
|
let minRoleAge = await this.getDefaultMinRoleAge();
|
|
797
1444
|
let isAllMature = true;
|
|
@@ -867,15 +1514,23 @@ let SharedLog = (() => {
|
|
|
867
1514
|
detail: { publicKey: from },
|
|
868
1515
|
}));
|
|
869
1516
|
if (isNewReplicator) {
|
|
870
|
-
this.
|
|
871
|
-
|
|
872
|
-
|
|
1517
|
+
if (!this._replicatorJoinEmitted.has(fromHash)) {
|
|
1518
|
+
this._replicatorJoinEmitted.add(fromHash);
|
|
1519
|
+
this.events.dispatchEvent(new CustomEvent("replicator:join", {
|
|
1520
|
+
detail: { publicKey: from },
|
|
1521
|
+
}));
|
|
1522
|
+
}
|
|
873
1523
|
if (isAllMature) {
|
|
874
1524
|
this.events.dispatchEvent(new CustomEvent("replicator:mature", {
|
|
875
1525
|
detail: { publicKey: from },
|
|
876
1526
|
}));
|
|
877
1527
|
}
|
|
878
1528
|
}
|
|
1529
|
+
if (isStoppedReplicating && stoppedTransition) {
|
|
1530
|
+
this.events.dispatchEvent(new CustomEvent("replicator:leave", {
|
|
1531
|
+
detail: { publicKey: from },
|
|
1532
|
+
}));
|
|
1533
|
+
}
|
|
879
1534
|
if (rebalance) {
|
|
880
1535
|
for (const diff of diffs) {
|
|
881
1536
|
this.replicationChangeDebounceFn.add(diff);
|
|
@@ -895,6 +1550,20 @@ let SharedLog = (() => {
|
|
|
895
1550
|
if (change) {
|
|
896
1551
|
let addedOrReplaced = change.filter((x) => x.type !== "removed");
|
|
897
1552
|
if (addedOrReplaced.length > 0) {
|
|
1553
|
+
// Provider discovery keep-alive (best-effort). This enables bounded targeted fetches
|
|
1554
|
+
// without relying on any global subscriber list.
|
|
1555
|
+
try {
|
|
1556
|
+
const fanoutService = this.node.services.fanout;
|
|
1557
|
+
if (fanoutService?.provide && !this._providerHandle) {
|
|
1558
|
+
this._providerHandle = fanoutService.provide(`shared-log|${this.topic}`, {
|
|
1559
|
+
ttlMs: 120_000,
|
|
1560
|
+
announceIntervalMs: 60_000,
|
|
1561
|
+
});
|
|
1562
|
+
}
|
|
1563
|
+
}
|
|
1564
|
+
catch {
|
|
1565
|
+
// Best-effort only.
|
|
1566
|
+
}
|
|
898
1567
|
let message = undefined;
|
|
899
1568
|
if (options.reset) {
|
|
900
1569
|
message = new AllReplicatingSegmentsMessage({
|
|
@@ -949,11 +1618,255 @@ let SharedLog = (() => {
|
|
|
949
1618
|
}
|
|
950
1619
|
return set;
|
|
951
1620
|
}
|
|
1621
|
+
dispatchMaybeMissingEntries(target, entries, options) {
|
|
1622
|
+
if (entries.size === 0) {
|
|
1623
|
+
return;
|
|
1624
|
+
}
|
|
1625
|
+
const now = Date.now();
|
|
1626
|
+
let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
|
|
1627
|
+
if (!recentlyDispatchedByHash) {
|
|
1628
|
+
recentlyDispatchedByHash = new Map();
|
|
1629
|
+
this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
|
|
1630
|
+
}
|
|
1631
|
+
for (const [hash, ts] of recentlyDispatchedByHash) {
|
|
1632
|
+
if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
1633
|
+
recentlyDispatchedByHash.delete(hash);
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1636
|
+
const filteredEntries = options?.bypassRecentDedupe === true
|
|
1637
|
+
? new Map(entries)
|
|
1638
|
+
: new Map();
|
|
1639
|
+
if (options?.bypassRecentDedupe !== true) {
|
|
1640
|
+
for (const [hash, entry] of entries) {
|
|
1641
|
+
const prev = recentlyDispatchedByHash.get(hash);
|
|
1642
|
+
if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
1643
|
+
continue;
|
|
1644
|
+
}
|
|
1645
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
1646
|
+
filteredEntries.set(hash, entry);
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
else {
|
|
1650
|
+
for (const hash of entries.keys()) {
|
|
1651
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
if (filteredEntries.size === 0) {
|
|
1655
|
+
return;
|
|
1656
|
+
}
|
|
1657
|
+
const run = () => {
|
|
1658
|
+
// For force-fresh churn repair we intentionally bypass rateless IBLT and
|
|
1659
|
+
// use simple hash-based sync. This path is a directed "push these hashes
|
|
1660
|
+
// to that peer" recovery flow; using simple sync here avoids occasional
|
|
1661
|
+
// single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
|
|
1662
|
+
if (options?.forceFreshDelivery &&
|
|
1663
|
+
this.syncronizer instanceof RatelessIBLTSynchronizer) {
|
|
1664
|
+
return Promise.resolve(this.syncronizer.simple.onMaybeMissingEntries({
|
|
1665
|
+
entries: filteredEntries,
|
|
1666
|
+
targets: [target],
|
|
1667
|
+
})).catch((error) => logger.error(error));
|
|
1668
|
+
}
|
|
1669
|
+
return Promise.resolve(this.syncronizer.onMaybeMissingEntries({
|
|
1670
|
+
entries: filteredEntries,
|
|
1671
|
+
targets: [target],
|
|
1672
|
+
})).catch((error) => logger.error(error));
|
|
1673
|
+
};
|
|
1674
|
+
const retrySchedule = options?.retryScheduleMs && options.retryScheduleMs.length > 0
|
|
1675
|
+
? options.retryScheduleMs
|
|
1676
|
+
: options?.forceFreshDelivery
|
|
1677
|
+
? FORCE_FRESH_RETRY_SCHEDULE_MS
|
|
1678
|
+
: [0];
|
|
1679
|
+
for (const delayMs of retrySchedule) {
|
|
1680
|
+
if (delayMs === 0) {
|
|
1681
|
+
void run();
|
|
1682
|
+
continue;
|
|
1683
|
+
}
|
|
1684
|
+
const timer = setTimeout(() => {
|
|
1685
|
+
this._repairRetryTimers.delete(timer);
|
|
1686
|
+
if (this.closed) {
|
|
1687
|
+
return;
|
|
1688
|
+
}
|
|
1689
|
+
void run();
|
|
1690
|
+
}, delayMs);
|
|
1691
|
+
timer.unref?.();
|
|
1692
|
+
this._repairRetryTimers.add(timer);
|
|
1693
|
+
}
|
|
1694
|
+
}
|
|
1695
|
+
scheduleRepairSweep(options) {
|
|
1696
|
+
if (options.forceFreshDelivery) {
|
|
1697
|
+
this._repairSweepForceFreshPending = true;
|
|
1698
|
+
}
|
|
1699
|
+
for (const peer of options.addedPeers) {
|
|
1700
|
+
this._repairSweepAddedPeersPending.add(peer);
|
|
1701
|
+
}
|
|
1702
|
+
if (!this._repairSweepRunning && !this.closed) {
|
|
1703
|
+
this._repairSweepRunning = true;
|
|
1704
|
+
void this.runRepairSweep();
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
async runRepairSweep() {
|
|
1708
|
+
try {
|
|
1709
|
+
while (!this.closed) {
|
|
1710
|
+
const forceFreshDelivery = this._repairSweepForceFreshPending;
|
|
1711
|
+
const addedPeers = new Set(this._repairSweepAddedPeersPending);
|
|
1712
|
+
this._repairSweepForceFreshPending = false;
|
|
1713
|
+
this._repairSweepAddedPeersPending.clear();
|
|
1714
|
+
if (!forceFreshDelivery && addedPeers.size === 0) {
|
|
1715
|
+
return;
|
|
1716
|
+
}
|
|
1717
|
+
const pendingByTarget = new Map();
|
|
1718
|
+
const flushTarget = (target) => {
|
|
1719
|
+
const entries = pendingByTarget.get(target);
|
|
1720
|
+
if (!entries || entries.size === 0) {
|
|
1721
|
+
return;
|
|
1722
|
+
}
|
|
1723
|
+
const isJoinWarmupTarget = addedPeers.has(target);
|
|
1724
|
+
const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
|
|
1725
|
+
this.dispatchMaybeMissingEntries(target, entries, {
|
|
1726
|
+
bypassRecentDedupe,
|
|
1727
|
+
retryScheduleMs: isJoinWarmupTarget
|
|
1728
|
+
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
1729
|
+
: undefined,
|
|
1730
|
+
forceFreshDelivery,
|
|
1731
|
+
});
|
|
1732
|
+
pendingByTarget.delete(target);
|
|
1733
|
+
};
|
|
1734
|
+
const queueEntryForTarget = (target, entry) => {
|
|
1735
|
+
let set = pendingByTarget.get(target);
|
|
1736
|
+
if (!set) {
|
|
1737
|
+
set = new Map();
|
|
1738
|
+
pendingByTarget.set(target, set);
|
|
1739
|
+
}
|
|
1740
|
+
if (set.has(entry.hash)) {
|
|
1741
|
+
return;
|
|
1742
|
+
}
|
|
1743
|
+
set.set(entry.hash, entry);
|
|
1744
|
+
if (set.size >= this.repairSweepTargetBufferSize) {
|
|
1745
|
+
flushTarget(target);
|
|
1746
|
+
}
|
|
1747
|
+
};
|
|
1748
|
+
const iterator = this.entryCoordinatesIndex.iterate({});
|
|
1749
|
+
try {
|
|
1750
|
+
while (!this.closed && !iterator.done()) {
|
|
1751
|
+
const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
|
|
1752
|
+
for (const entry of entries) {
|
|
1753
|
+
const entryReplicated = entry.value;
|
|
1754
|
+
const currentPeers = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
|
|
1755
|
+
if (forceFreshDelivery) {
|
|
1756
|
+
for (const [currentPeer] of currentPeers) {
|
|
1757
|
+
if (currentPeer === this.node.identity.publicKey.hashcode()) {
|
|
1758
|
+
continue;
|
|
1759
|
+
}
|
|
1760
|
+
queueEntryForTarget(currentPeer, entryReplicated);
|
|
1761
|
+
}
|
|
1762
|
+
}
|
|
1763
|
+
if (addedPeers.size > 0) {
|
|
1764
|
+
for (const peer of addedPeers) {
|
|
1765
|
+
if (currentPeers.has(peer)) {
|
|
1766
|
+
queueEntryForTarget(peer, entryReplicated);
|
|
1767
|
+
}
|
|
1768
|
+
}
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
}
|
|
1772
|
+
}
|
|
1773
|
+
finally {
|
|
1774
|
+
await iterator.close();
|
|
1775
|
+
}
|
|
1776
|
+
for (const target of [...pendingByTarget.keys()]) {
|
|
1777
|
+
flushTarget(target);
|
|
1778
|
+
}
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
catch (error) {
|
|
1782
|
+
if (!isNotStartedError(error)) {
|
|
1783
|
+
logger.error(`Repair sweep failed: ${error?.message ?? error}`);
|
|
1784
|
+
}
|
|
1785
|
+
}
|
|
1786
|
+
finally {
|
|
1787
|
+
this._repairSweepRunning = false;
|
|
1788
|
+
if (!this.closed &&
|
|
1789
|
+
(this._repairSweepForceFreshPending ||
|
|
1790
|
+
this._repairSweepAddedPeersPending.size > 0)) {
|
|
1791
|
+
this._repairSweepRunning = true;
|
|
1792
|
+
void this.runRepairSweep();
|
|
1793
|
+
}
|
|
1794
|
+
}
|
|
1795
|
+
}
|
|
952
1796
|
async pruneDebouncedFnAddIfNotKeeping(args) {
|
|
953
1797
|
if (!this.keep || !(await this.keep(args.value.entry))) {
|
|
954
1798
|
return this.pruneDebouncedFn.add(args);
|
|
955
1799
|
}
|
|
956
1800
|
}
|
|
1801
|
+
clearCheckedPruneRetry(hash) {
|
|
1802
|
+
const state = this._checkedPruneRetries.get(hash);
|
|
1803
|
+
if (state?.timer) {
|
|
1804
|
+
clearTimeout(state.timer);
|
|
1805
|
+
}
|
|
1806
|
+
this._checkedPruneRetries.delete(hash);
|
|
1807
|
+
}
|
|
1808
|
+
scheduleCheckedPruneRetry(args) {
|
|
1809
|
+
if (this.closed)
|
|
1810
|
+
return;
|
|
1811
|
+
if (this._pendingDeletes.has(args.entry.hash))
|
|
1812
|
+
return;
|
|
1813
|
+
const hash = args.entry.hash;
|
|
1814
|
+
const state = this._checkedPruneRetries.get(hash) ?? { attempts: 0 };
|
|
1815
|
+
if (state.timer)
|
|
1816
|
+
return;
|
|
1817
|
+
if (state.attempts >= CHECKED_PRUNE_RETRY_MAX_ATTEMPTS) {
|
|
1818
|
+
// Avoid unbounded background retries; a new replication-change event can
|
|
1819
|
+
// always re-enqueue pruning with fresh leader info.
|
|
1820
|
+
return;
|
|
1821
|
+
}
|
|
1822
|
+
const attempt = state.attempts + 1;
|
|
1823
|
+
const jitterMs = Math.floor(Math.random() * 250);
|
|
1824
|
+
const delayMs = Math.min(CHECKED_PRUNE_RETRY_MAX_DELAY_MS, 1_000 * 2 ** (attempt - 1) + jitterMs);
|
|
1825
|
+
state.attempts = attempt;
|
|
1826
|
+
state.timer = setTimeout(async () => {
|
|
1827
|
+
const st = this._checkedPruneRetries.get(hash);
|
|
1828
|
+
if (st)
|
|
1829
|
+
st.timer = undefined;
|
|
1830
|
+
if (this.closed)
|
|
1831
|
+
return;
|
|
1832
|
+
if (this._pendingDeletes.has(hash))
|
|
1833
|
+
return;
|
|
1834
|
+
let leadersMap;
|
|
1835
|
+
try {
|
|
1836
|
+
const replicas = decodeReplicas(args.entry).getValue(this);
|
|
1837
|
+
leadersMap = await this.findLeadersFromEntry(args.entry, replicas, {
|
|
1838
|
+
roleAge: 0,
|
|
1839
|
+
});
|
|
1840
|
+
}
|
|
1841
|
+
catch {
|
|
1842
|
+
// Best-effort only.
|
|
1843
|
+
}
|
|
1844
|
+
if (!leadersMap || leadersMap.size === 0) {
|
|
1845
|
+
if (args.leaders instanceof Map) {
|
|
1846
|
+
leadersMap = args.leaders;
|
|
1847
|
+
}
|
|
1848
|
+
else {
|
|
1849
|
+
leadersMap = new Map();
|
|
1850
|
+
for (const k of args.leaders) {
|
|
1851
|
+
leadersMap.set(k, { intersecting: true });
|
|
1852
|
+
}
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
try {
|
|
1856
|
+
const leadersForRetry = leadersMap ?? new Map();
|
|
1857
|
+
await this.pruneDebouncedFnAddIfNotKeeping({
|
|
1858
|
+
key: hash,
|
|
1859
|
+
// TODO types
|
|
1860
|
+
value: { entry: args.entry, leaders: leadersForRetry },
|
|
1861
|
+
});
|
|
1862
|
+
}
|
|
1863
|
+
catch {
|
|
1864
|
+
// Best-effort only; pruning will be re-attempted on future changes.
|
|
1865
|
+
}
|
|
1866
|
+
}, delayMs);
|
|
1867
|
+
state.timer.unref?.();
|
|
1868
|
+
this._checkedPruneRetries.set(hash, state);
|
|
1869
|
+
}
|
|
957
1870
|
async append(data, options) {
|
|
958
1871
|
const appendOptions = { ...options };
|
|
959
1872
|
const minReplicas = this.getClampedReplicas(options?.replicas
|
|
@@ -1002,239 +1915,18 @@ let SharedLog = (() => {
|
|
|
1002
1915
|
if (options?.target !== "none") {
|
|
1003
1916
|
const target = options?.target;
|
|
1004
1917
|
const deliveryArg = options?.delivery;
|
|
1005
|
-
const
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
? {}
|
|
1009
|
-
: deliveryArg;
|
|
1010
|
-
let requireRecipients = false;
|
|
1011
|
-
let settleMin;
|
|
1012
|
-
let guardDelivery = undefined;
|
|
1013
|
-
let firstDeliveryPromise;
|
|
1014
|
-
let deliveryPromises;
|
|
1015
|
-
let addDeliveryPromise;
|
|
1016
|
-
const leadersForDelivery = delivery && (target === "replicators" || !target)
|
|
1017
|
-
? new Set(leaders.keys())
|
|
1018
|
-
: undefined;
|
|
1019
|
-
if (delivery) {
|
|
1020
|
-
const deliverySettle = delivery.settle ?? true;
|
|
1021
|
-
const deliveryTimeout = delivery.timeout;
|
|
1022
|
-
const deliverySignal = delivery.signal;
|
|
1023
|
-
requireRecipients = delivery.requireRecipients === true;
|
|
1024
|
-
settleMin =
|
|
1025
|
-
typeof deliverySettle === "object" &&
|
|
1026
|
-
Number.isFinite(deliverySettle.min)
|
|
1027
|
-
? Math.max(0, Math.floor(deliverySettle.min))
|
|
1028
|
-
: undefined;
|
|
1029
|
-
guardDelivery =
|
|
1030
|
-
deliveryTimeout == null && deliverySignal == null
|
|
1031
|
-
? undefined
|
|
1032
|
-
: (promise) => new Promise((resolve, reject) => {
|
|
1033
|
-
let settled = false;
|
|
1034
|
-
let timer = undefined;
|
|
1035
|
-
const onAbort = () => {
|
|
1036
|
-
if (settled) {
|
|
1037
|
-
return;
|
|
1038
|
-
}
|
|
1039
|
-
settled = true;
|
|
1040
|
-
promise.catch(() => { });
|
|
1041
|
-
cleanup();
|
|
1042
|
-
reject(new AbortError());
|
|
1043
|
-
};
|
|
1044
|
-
const cleanup = () => {
|
|
1045
|
-
if (timer != null) {
|
|
1046
|
-
clearTimeout(timer);
|
|
1047
|
-
timer = undefined;
|
|
1048
|
-
}
|
|
1049
|
-
deliverySignal?.removeEventListener("abort", onAbort);
|
|
1050
|
-
};
|
|
1051
|
-
if (deliverySignal) {
|
|
1052
|
-
if (deliverySignal.aborted) {
|
|
1053
|
-
onAbort();
|
|
1054
|
-
return;
|
|
1055
|
-
}
|
|
1056
|
-
deliverySignal.addEventListener("abort", onAbort);
|
|
1057
|
-
}
|
|
1058
|
-
if (deliveryTimeout != null) {
|
|
1059
|
-
timer = setTimeout(() => {
|
|
1060
|
-
if (settled) {
|
|
1061
|
-
return;
|
|
1062
|
-
}
|
|
1063
|
-
settled = true;
|
|
1064
|
-
promise.catch(() => { });
|
|
1065
|
-
cleanup();
|
|
1066
|
-
reject(new TimeoutError(`Timeout waiting for delivery`));
|
|
1067
|
-
}, deliveryTimeout);
|
|
1068
|
-
}
|
|
1069
|
-
promise
|
|
1070
|
-
.then(() => {
|
|
1071
|
-
if (settled) {
|
|
1072
|
-
return;
|
|
1073
|
-
}
|
|
1074
|
-
settled = true;
|
|
1075
|
-
cleanup();
|
|
1076
|
-
resolve();
|
|
1077
|
-
})
|
|
1078
|
-
.catch((e) => {
|
|
1079
|
-
if (settled) {
|
|
1080
|
-
return;
|
|
1081
|
-
}
|
|
1082
|
-
settled = true;
|
|
1083
|
-
cleanup();
|
|
1084
|
-
reject(e);
|
|
1085
|
-
});
|
|
1086
|
-
});
|
|
1087
|
-
addDeliveryPromise = (promise) => {
|
|
1088
|
-
if (!firstDeliveryPromise) {
|
|
1089
|
-
firstDeliveryPromise = promise;
|
|
1090
|
-
return;
|
|
1091
|
-
}
|
|
1092
|
-
if (!deliveryPromises) {
|
|
1093
|
-
deliveryPromises = [firstDeliveryPromise, promise];
|
|
1094
|
-
firstDeliveryPromise = undefined;
|
|
1095
|
-
return;
|
|
1096
|
-
}
|
|
1097
|
-
deliveryPromises.push(promise);
|
|
1098
|
-
};
|
|
1918
|
+
const hasDelivery = !(deliveryArg === undefined || deliveryArg === false);
|
|
1919
|
+
if (target === "all" && hasDelivery) {
|
|
1920
|
+
throw new Error(`delivery options are not supported with target="all"; fanout broadcast is fire-and-forward`);
|
|
1099
1921
|
}
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
])) {
|
|
1103
|
-
if (target === "replicators" || !target) {
|
|
1104
|
-
if (message.heads[0].gidRefrences.length > 0) {
|
|
1105
|
-
for (const ref of message.heads[0].gidRefrences) {
|
|
1106
|
-
const entryFromGid = this.log.entryIndex.getHeads(ref, false);
|
|
1107
|
-
for (const entry of await entryFromGid.all()) {
|
|
1108
|
-
let coordinates = await this.getCoordinates(entry);
|
|
1109
|
-
if (coordinates == null) {
|
|
1110
|
-
coordinates = await this.createCoordinates(entry, minReplicasValue);
|
|
1111
|
-
// TODO are we every to come here?
|
|
1112
|
-
}
|
|
1113
|
-
const result = await this._findLeaders(coordinates);
|
|
1114
|
-
for (const [k, v] of result) {
|
|
1115
|
-
leaders.set(k, v);
|
|
1116
|
-
}
|
|
1117
|
-
}
|
|
1118
|
-
}
|
|
1119
|
-
}
|
|
1120
|
-
const set = this.addPeersToGidPeerHistory(result.entry.meta.gid, leaders.keys());
|
|
1121
|
-
let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
1122
|
-
if (!hasRemotePeers) {
|
|
1123
|
-
if (requireRecipients) {
|
|
1124
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1125
|
-
}
|
|
1126
|
-
continue;
|
|
1127
|
-
}
|
|
1128
|
-
if (!delivery) {
|
|
1129
|
-
this.rpc
|
|
1130
|
-
.send(message, {
|
|
1131
|
-
mode: isLeader
|
|
1132
|
-
? new SilentDelivery({ redundancy: 1, to: set })
|
|
1133
|
-
: new AcknowledgeDelivery({ redundancy: 1, to: set }),
|
|
1134
|
-
})
|
|
1135
|
-
.catch((e) => logger.error(e));
|
|
1136
|
-
continue;
|
|
1137
|
-
}
|
|
1138
|
-
let expectedRemoteRecipientsCount = 0;
|
|
1139
|
-
const ackTo = [];
|
|
1140
|
-
let silentTo;
|
|
1141
|
-
const ackLimit = settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
|
|
1142
|
-
// Always settle towards the current expected replicators for this entry,
|
|
1143
|
-
// not the entire gid peer history.
|
|
1144
|
-
for (const peer of leadersForDelivery) {
|
|
1145
|
-
if (peer === selfHash) {
|
|
1146
|
-
continue;
|
|
1147
|
-
}
|
|
1148
|
-
expectedRemoteRecipientsCount++;
|
|
1149
|
-
if (ackTo.length < ackLimit) {
|
|
1150
|
-
ackTo.push(peer);
|
|
1151
|
-
}
|
|
1152
|
-
else {
|
|
1153
|
-
silentTo ||= [];
|
|
1154
|
-
silentTo.push(peer);
|
|
1155
|
-
}
|
|
1156
|
-
}
|
|
1157
|
-
// Still deliver to known peers for the gid (best-effort), but don't let them
|
|
1158
|
-
// satisfy the settle requirement.
|
|
1159
|
-
for (const peer of set) {
|
|
1160
|
-
if (peer === selfHash) {
|
|
1161
|
-
continue;
|
|
1162
|
-
}
|
|
1163
|
-
if (leadersForDelivery.has(peer)) {
|
|
1164
|
-
continue;
|
|
1165
|
-
}
|
|
1166
|
-
silentTo ||= [];
|
|
1167
|
-
silentTo.push(peer);
|
|
1168
|
-
}
|
|
1169
|
-
if (requireRecipients && expectedRemoteRecipientsCount === 0) {
|
|
1170
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1171
|
-
}
|
|
1172
|
-
if (requireRecipients &&
|
|
1173
|
-
ackTo.length + (silentTo?.length || 0) === 0) {
|
|
1174
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1175
|
-
}
|
|
1176
|
-
if (ackTo.length > 0) {
|
|
1177
|
-
const promise = this.rpc.send(message, {
|
|
1178
|
-
mode: new AcknowledgeDelivery({
|
|
1179
|
-
redundancy: 1,
|
|
1180
|
-
to: ackTo,
|
|
1181
|
-
}),
|
|
1182
|
-
});
|
|
1183
|
-
addDeliveryPromise(guardDelivery ? guardDelivery(promise) : promise);
|
|
1184
|
-
}
|
|
1185
|
-
if (silentTo?.length) {
|
|
1186
|
-
this.rpc
|
|
1187
|
-
.send(message, {
|
|
1188
|
-
mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
|
|
1189
|
-
})
|
|
1190
|
-
.catch((e) => logger.error(e));
|
|
1191
|
-
}
|
|
1192
|
-
}
|
|
1193
|
-
else {
|
|
1194
|
-
if (!delivery) {
|
|
1195
|
-
this.rpc.send(message).catch((e) => logger.error(e));
|
|
1196
|
-
continue;
|
|
1197
|
-
}
|
|
1198
|
-
const subscribers = await this.node.services.pubsub.getSubscribers(this.rpc.topic);
|
|
1199
|
-
const ackTo = [];
|
|
1200
|
-
let silentTo;
|
|
1201
|
-
const ackLimit = settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
|
|
1202
|
-
for (const subscriber of subscribers || []) {
|
|
1203
|
-
if (subscriber.hashcode() === selfHash) {
|
|
1204
|
-
continue;
|
|
1205
|
-
}
|
|
1206
|
-
if (ackTo.length < ackLimit) {
|
|
1207
|
-
ackTo.push(subscriber);
|
|
1208
|
-
}
|
|
1209
|
-
else {
|
|
1210
|
-
silentTo ||= [];
|
|
1211
|
-
silentTo.push(subscriber);
|
|
1212
|
-
}
|
|
1213
|
-
}
|
|
1214
|
-
if (requireRecipients &&
|
|
1215
|
-
ackTo.length + (silentTo?.length || 0) === 0) {
|
|
1216
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1217
|
-
}
|
|
1218
|
-
if (ackTo.length > 0) {
|
|
1219
|
-
const promise = this.rpc.send(message, {
|
|
1220
|
-
mode: new AcknowledgeDelivery({ redundancy: 1, to: ackTo }),
|
|
1221
|
-
});
|
|
1222
|
-
addDeliveryPromise(guardDelivery ? guardDelivery(promise) : promise);
|
|
1223
|
-
}
|
|
1224
|
-
if (silentTo?.length) {
|
|
1225
|
-
this.rpc
|
|
1226
|
-
.send(message, {
|
|
1227
|
-
mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
|
|
1228
|
-
})
|
|
1229
|
-
.catch((e) => logger.error(e));
|
|
1230
|
-
}
|
|
1231
|
-
}
|
|
1922
|
+
if (target === "all" && !this._fanoutChannel) {
|
|
1923
|
+
throw new Error(`No fanout channel configured for shared-log topic ${this.topic}`);
|
|
1232
1924
|
}
|
|
1233
|
-
if (
|
|
1234
|
-
await
|
|
1925
|
+
if (target === "all") {
|
|
1926
|
+
await this._appendDeliverToAllFanout(result.entry);
|
|
1235
1927
|
}
|
|
1236
|
-
else
|
|
1237
|
-
await
|
|
1928
|
+
else {
|
|
1929
|
+
await this._appendDeliverToReplicators(result.entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
|
|
1238
1930
|
}
|
|
1239
1931
|
}
|
|
1240
1932
|
if (!isLeader) {
|
|
@@ -1269,14 +1961,24 @@ let SharedLog = (() => {
|
|
|
1269
1961
|
this._pendingDeletes = new Map();
|
|
1270
1962
|
this._pendingIHave = new Map();
|
|
1271
1963
|
this.latestReplicationInfoMessage = new Map();
|
|
1964
|
+
this._replicationInfoBlockedPeers = new Set();
|
|
1965
|
+
this._replicationInfoRequestByPeer = new Map();
|
|
1966
|
+
this._replicationInfoApplyQueueByPeer = new Map();
|
|
1967
|
+
this._repairRetryTimers = new Set();
|
|
1968
|
+
this._recentRepairDispatch = new Map();
|
|
1969
|
+
this._repairSweepRunning = false;
|
|
1970
|
+
this._repairSweepForceFreshPending = false;
|
|
1971
|
+
this._repairSweepAddedPeersPending = new Set();
|
|
1272
1972
|
this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
|
|
1273
1973
|
this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
|
|
1274
1974
|
this.uniqueReplicators = new Set();
|
|
1975
|
+
this._replicatorJoinEmitted = new Set();
|
|
1275
1976
|
this._replicatorsReconciled = false;
|
|
1276
1977
|
this.openTime = +new Date();
|
|
1277
1978
|
this.oldestOpenTime = this.openTime;
|
|
1278
1979
|
this.distributionDebounceTime =
|
|
1279
1980
|
options?.distributionDebounceTime || DEFAULT_DISTRIBUTION_DEBOUNCE_TIME; // expect > 0
|
|
1981
|
+
this.repairSweepTargetBufferSize = toPositiveInteger(options?.sync?.repairSweepTargetBufferSize, REPAIR_SWEEP_TARGET_BUFFER_SIZE, "sync.repairSweepTargetBufferSize");
|
|
1280
1982
|
this.timeUntilRoleMaturity =
|
|
1281
1983
|
options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
|
|
1282
1984
|
this.waitForReplicatorTimeout =
|
|
@@ -1298,18 +2000,70 @@ let SharedLog = (() => {
|
|
|
1298
2000
|
throw new Error("waitForReplicatorRequestMaxAttempts must be a positive number");
|
|
1299
2001
|
}
|
|
1300
2002
|
this._closeController = new AbortController();
|
|
2003
|
+
this._closeController.signal.addEventListener("abort", () => {
|
|
2004
|
+
for (const [_peer, state] of this._replicationInfoRequestByPeer) {
|
|
2005
|
+
if (state.timer)
|
|
2006
|
+
clearTimeout(state.timer);
|
|
2007
|
+
}
|
|
2008
|
+
this._replicationInfoRequestByPeer.clear();
|
|
2009
|
+
});
|
|
1301
2010
|
this._isTrustedReplicator = options?.canReplicate;
|
|
1302
2011
|
this.keep = options?.keep;
|
|
1303
2012
|
this.pendingMaturity = new Map();
|
|
1304
2013
|
const id = sha256Base64Sync(this.log.id);
|
|
1305
2014
|
const storage = await this.node.storage.sublevel(id);
|
|
1306
2015
|
const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
|
|
2016
|
+
const fanoutService = this.node.services.fanout;
|
|
2017
|
+
const blockProviderNamespace = (cid) => `cid:${cid}`;
|
|
1307
2018
|
this.remoteBlocks = new RemoteBlocks({
|
|
1308
2019
|
local: localBlocks,
|
|
1309
|
-
publish: (message, options) => this.rpc.send(new BlocksMessage(message), options
|
|
2020
|
+
publish: (message, options) => this.rpc.send(new BlocksMessage(message), options),
|
|
1310
2021
|
waitFor: this.rpc.waitFor.bind(this.rpc),
|
|
1311
2022
|
publicKey: this.node.identity.publicKey,
|
|
1312
2023
|
eagerBlocks: options?.eagerBlocks ?? true,
|
|
2024
|
+
resolveProviders: async (cid, opts) => {
|
|
2025
|
+
// 1) tracker-backed provider directory (best-effort, bounded)
|
|
2026
|
+
try {
|
|
2027
|
+
const providers = await fanoutService?.queryProviders(blockProviderNamespace(cid), {
|
|
2028
|
+
want: 8,
|
|
2029
|
+
timeoutMs: 2_000,
|
|
2030
|
+
queryTimeoutMs: 500,
|
|
2031
|
+
bootstrapMaxPeers: 2,
|
|
2032
|
+
signal: opts?.signal,
|
|
2033
|
+
});
|
|
2034
|
+
if (providers && providers.length > 0)
|
|
2035
|
+
return providers;
|
|
2036
|
+
}
|
|
2037
|
+
catch {
|
|
2038
|
+
// ignore discovery failures
|
|
2039
|
+
}
|
|
2040
|
+
// 2) fallback to currently connected RPC peers
|
|
2041
|
+
const self = this.node.identity.publicKey.hashcode();
|
|
2042
|
+
const out = [];
|
|
2043
|
+
const peers = this.rpc?.peers;
|
|
2044
|
+
for (const h of peers?.keys?.() ?? []) {
|
|
2045
|
+
if (h === self)
|
|
2046
|
+
continue;
|
|
2047
|
+
if (out.includes(h))
|
|
2048
|
+
continue;
|
|
2049
|
+
out.push(h);
|
|
2050
|
+
if (out.length >= 32)
|
|
2051
|
+
break;
|
|
2052
|
+
}
|
|
2053
|
+
return out;
|
|
2054
|
+
},
|
|
2055
|
+
onPut: async (cid) => {
|
|
2056
|
+
// Best-effort directory announce for "get without remote.from" workflows.
|
|
2057
|
+
try {
|
|
2058
|
+
await fanoutService?.announceProvider(blockProviderNamespace(cid), {
|
|
2059
|
+
ttlMs: 120_000,
|
|
2060
|
+
bootstrapMaxPeers: 2,
|
|
2061
|
+
});
|
|
2062
|
+
}
|
|
2063
|
+
catch {
|
|
2064
|
+
// ignore announce failures
|
|
2065
|
+
}
|
|
2066
|
+
},
|
|
1313
2067
|
});
|
|
1314
2068
|
await this.remoteBlocks.start();
|
|
1315
2069
|
const logScope = await this.node.indexer.scope(id);
|
|
@@ -1332,6 +2086,7 @@ let SharedLog = (() => {
|
|
|
1332
2086
|
this._gidPeersHistory = new Map();
|
|
1333
2087
|
this._requestIPruneSent = new Map();
|
|
1334
2088
|
this._requestIPruneResponseReplicatorSet = new Map();
|
|
2089
|
+
this._checkedPruneRetries = new Map();
|
|
1335
2090
|
this.replicationChangeDebounceFn = debounceAggregationChanges((change) => this.onReplicationChange(change).then(() => this.rebalanceParticipationDebounced?.call()), this.distributionDebounceTime);
|
|
1336
2091
|
this.pruneDebouncedFn = debouncedAccumulatorMap((map) => {
|
|
1337
2092
|
this.prune(map);
|
|
@@ -1389,6 +2144,83 @@ let SharedLog = (() => {
|
|
|
1389
2144
|
}, PRUNE_DEBOUNCE_INTERVAL);
|
|
1390
2145
|
await this.log.open(this.remoteBlocks, this.node.identity, {
|
|
1391
2146
|
keychain: this.node.services.keychain,
|
|
2147
|
+
resolveRemotePeers: async (hash, options) => {
|
|
2148
|
+
if (options?.signal?.aborted)
|
|
2149
|
+
return undefined;
|
|
2150
|
+
const maxPeers = 8;
|
|
2151
|
+
const self = this.node.identity.publicKey.hashcode();
|
|
2152
|
+
const seed = hashToSeed32(hash);
|
|
2153
|
+
// Best hint: peers that have recently confirmed having this entry hash.
|
|
2154
|
+
const hinted = this._requestIPruneResponseReplicatorSet.get(hash);
|
|
2155
|
+
if (hinted && hinted.size > 0) {
|
|
2156
|
+
const peers = [...hinted].filter((p) => p !== self);
|
|
2157
|
+
return peers.length > 0
|
|
2158
|
+
? pickDeterministicSubset(peers, seed, maxPeers)
|
|
2159
|
+
: undefined;
|
|
2160
|
+
}
|
|
2161
|
+
// Next: peers we already contacted about this hash (may still have it).
|
|
2162
|
+
const contacted = this._requestIPruneSent.get(hash);
|
|
2163
|
+
if (contacted && contacted.size > 0) {
|
|
2164
|
+
const peers = [...contacted].filter((p) => p !== self);
|
|
2165
|
+
return peers.length > 0
|
|
2166
|
+
? pickDeterministicSubset(peers, seed, maxPeers)
|
|
2167
|
+
: undefined;
|
|
2168
|
+
}
|
|
2169
|
+
let candidates;
|
|
2170
|
+
// Prefer the replicator cache; fall back to subscribers if we have no other signal.
|
|
2171
|
+
const replicatorCandidates = [...this.uniqueReplicators].filter((p) => p !== self);
|
|
2172
|
+
if (replicatorCandidates.length > 0) {
|
|
2173
|
+
candidates = replicatorCandidates;
|
|
2174
|
+
}
|
|
2175
|
+
else {
|
|
2176
|
+
try {
|
|
2177
|
+
const subscribers = await this._getTopicSubscribers(this.topic);
|
|
2178
|
+
const subscriberCandidates = subscribers?.map((k) => k.hashcode()).filter((p) => p !== self) ??
|
|
2179
|
+
[];
|
|
2180
|
+
candidates =
|
|
2181
|
+
subscriberCandidates.length > 0 ? subscriberCandidates : undefined;
|
|
2182
|
+
}
|
|
2183
|
+
catch {
|
|
2184
|
+
// Best-effort only.
|
|
2185
|
+
}
|
|
2186
|
+
if (!candidates || candidates.length === 0) {
|
|
2187
|
+
// Last resort: peers we are already directly connected to. This avoids
|
|
2188
|
+
// depending on global membership knowledge in early-join scenarios.
|
|
2189
|
+
const peerMap = this.node.services.pubsub?.peers;
|
|
2190
|
+
if (peerMap?.keys) {
|
|
2191
|
+
candidates = [...peerMap.keys()];
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
if (!candidates || candidates.length === 0) {
|
|
2195
|
+
// Even if the pubsub stream has no established peer streams yet, we may
|
|
2196
|
+
// still have a libp2p connection to one or more peers (e.g. bootstrap).
|
|
2197
|
+
const connectionManager = this.node.services.pubsub?.components
|
|
2198
|
+
?.connectionManager;
|
|
2199
|
+
const connections = connectionManager?.getConnections?.() ?? [];
|
|
2200
|
+
const connectionHashes = [];
|
|
2201
|
+
for (const conn of connections) {
|
|
2202
|
+
const peerId = conn?.remotePeer;
|
|
2203
|
+
if (!peerId)
|
|
2204
|
+
continue;
|
|
2205
|
+
try {
|
|
2206
|
+
connectionHashes.push(getPublicKeyFromPeerId(peerId).hashcode());
|
|
2207
|
+
}
|
|
2208
|
+
catch {
|
|
2209
|
+
// Best-effort only.
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
if (connectionHashes.length > 0) {
|
|
2213
|
+
candidates = connectionHashes;
|
|
2214
|
+
}
|
|
2215
|
+
}
|
|
2216
|
+
}
|
|
2217
|
+
if (!candidates || candidates.length === 0)
|
|
2218
|
+
return undefined;
|
|
2219
|
+
const peers = candidates.filter((p) => p !== self);
|
|
2220
|
+
if (peers.length === 0)
|
|
2221
|
+
return undefined;
|
|
2222
|
+
return pickDeterministicSubset(peers, seed, maxPeers);
|
|
2223
|
+
},
|
|
1392
2224
|
...this._logProperties,
|
|
1393
2225
|
onChange: async (change) => {
|
|
1394
2226
|
await this.onChange(change);
|
|
@@ -1456,6 +2288,7 @@ let SharedLog = (() => {
|
|
|
1456
2288
|
this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
|
|
1457
2289
|
await this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn);
|
|
1458
2290
|
await this.rpc.subscribe();
|
|
2291
|
+
await this._openFanoutChannel(options?.fanout);
|
|
1459
2292
|
// mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
|
|
1460
2293
|
await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
|
|
1461
2294
|
// if we had a previous session with replication info, and new replication info dictates that we unreplicate
|
|
@@ -1518,7 +2351,7 @@ let SharedLog = (() => {
|
|
|
1518
2351
|
});
|
|
1519
2352
|
await this.rebalanceParticipation();
|
|
1520
2353
|
// Take into account existing subscription
|
|
1521
|
-
(await this.
|
|
2354
|
+
(await this._getTopicSubscribers(this.topic))?.forEach((v) => {
|
|
1522
2355
|
if (v.equals(this.node.identity.publicKey)) {
|
|
1523
2356
|
return;
|
|
1524
2357
|
}
|
|
@@ -1551,18 +2384,22 @@ let SharedLog = (() => {
|
|
|
1551
2384
|
})
|
|
1552
2385
|
.then(async () => {
|
|
1553
2386
|
// is reachable, announce change events
|
|
1554
|
-
const key = await this.
|
|
2387
|
+
const key = await this._resolvePublicKeyFromHash(segment.value.hash);
|
|
1555
2388
|
if (!key) {
|
|
1556
2389
|
throw new Error("Failed to resolve public key from hash: " +
|
|
1557
2390
|
segment.value.hash);
|
|
1558
2391
|
}
|
|
1559
|
-
|
|
1560
|
-
this.
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
2392
|
+
const keyHash = key.hashcode();
|
|
2393
|
+
this.uniqueReplicators.add(keyHash);
|
|
2394
|
+
if (!this._replicatorJoinEmitted.has(keyHash)) {
|
|
2395
|
+
this._replicatorJoinEmitted.add(keyHash);
|
|
2396
|
+
this.events.dispatchEvent(new CustomEvent("replicator:join", {
|
|
2397
|
+
detail: { publicKey: key },
|
|
2398
|
+
}));
|
|
2399
|
+
this.events.dispatchEvent(new CustomEvent("replication:change", {
|
|
2400
|
+
detail: { publicKey: key },
|
|
2401
|
+
}));
|
|
2402
|
+
}
|
|
1566
2403
|
})
|
|
1567
2404
|
.catch(async (e) => {
|
|
1568
2405
|
if (isNotStartedError(e)) {
|
|
@@ -1672,26 +2509,33 @@ let SharedLog = (() => {
|
|
|
1672
2509
|
for (const [key, _] of this.syncronizer.syncInFlight) {
|
|
1673
2510
|
set.add(key);
|
|
1674
2511
|
}
|
|
2512
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
1675
2513
|
if (options?.reachableOnly) {
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
//
|
|
1679
|
-
//
|
|
1680
|
-
|
|
1681
|
-
|
|
2514
|
+
const directPeers = this.node.services
|
|
2515
|
+
.pubsub?.peers;
|
|
2516
|
+
// Prefer the live pubsub subscriber set when filtering reachability. In some
|
|
2517
|
+
// flows peers can be reachable/active even before (or without) subscriber
|
|
2518
|
+
// state converging, so also consider direct pubsub peers.
|
|
2519
|
+
const subscribers = (await this._getTopicSubscribers(this.topic)) ?? undefined;
|
|
1682
2520
|
const subscriberHashcodes = subscribers
|
|
1683
2521
|
? new Set(subscribers.map((key) => key.hashcode()))
|
|
1684
2522
|
: undefined;
|
|
2523
|
+
// If reachability is requested but we have no basis for filtering yet
|
|
2524
|
+
// (subscriber snapshot hasn't converged), return the full cover set.
|
|
2525
|
+
// Otherwise, only keep peers we can currently reach.
|
|
2526
|
+
const canFilter = directPeers != null ||
|
|
2527
|
+
(subscriberHashcodes && subscriberHashcodes.size > 0);
|
|
2528
|
+
if (!canFilter) {
|
|
2529
|
+
return [...set];
|
|
2530
|
+
}
|
|
1685
2531
|
const reachable = [];
|
|
1686
|
-
const selfHash = this.node.identity.publicKey.hashcode();
|
|
1687
2532
|
for (const peer of set) {
|
|
1688
2533
|
if (peer === selfHash) {
|
|
1689
2534
|
reachable.push(peer);
|
|
1690
2535
|
continue;
|
|
1691
2536
|
}
|
|
1692
|
-
if (subscriberHashcodes
|
|
1693
|
-
|
|
1694
|
-
: this.uniqueReplicators.has(peer)) {
|
|
2537
|
+
if ((subscriberHashcodes && subscriberHashcodes.has(peer)) ||
|
|
2538
|
+
(directPeers && directPeers.has(peer))) {
|
|
1695
2539
|
reachable.push(peer);
|
|
1696
2540
|
}
|
|
1697
2541
|
}
|
|
@@ -1716,6 +2560,14 @@ let SharedLog = (() => {
|
|
|
1716
2560
|
}
|
|
1717
2561
|
this.pendingMaturity.clear();
|
|
1718
2562
|
this.distributeQueue?.clear();
|
|
2563
|
+
this._closeFanoutChannel();
|
|
2564
|
+
try {
|
|
2565
|
+
this._providerHandle?.close();
|
|
2566
|
+
}
|
|
2567
|
+
catch {
|
|
2568
|
+
// ignore
|
|
2569
|
+
}
|
|
2570
|
+
this._providerHandle = undefined;
|
|
1719
2571
|
this.coordinateToHash.clear();
|
|
1720
2572
|
this.recentlyRebalanced.clear();
|
|
1721
2573
|
this.uniqueReplicators.clear();
|
|
@@ -1723,6 +2575,14 @@ let SharedLog = (() => {
|
|
|
1723
2575
|
clearInterval(this.interval);
|
|
1724
2576
|
this.node.services.pubsub.removeEventListener("subscribe", this._onSubscriptionFn);
|
|
1725
2577
|
this.node.services.pubsub.removeEventListener("unsubscribe", this._onUnsubscriptionFn);
|
|
2578
|
+
for (const timer of this._repairRetryTimers) {
|
|
2579
|
+
clearTimeout(timer);
|
|
2580
|
+
}
|
|
2581
|
+
this._repairRetryTimers.clear();
|
|
2582
|
+
this._recentRepairDispatch.clear();
|
|
2583
|
+
this._repairSweepRunning = false;
|
|
2584
|
+
this._repairSweepForceFreshPending = false;
|
|
2585
|
+
this._repairSweepAddedPeersPending.clear();
|
|
1726
2586
|
for (const [_k, v] of this._pendingDeletes) {
|
|
1727
2587
|
v.clear();
|
|
1728
2588
|
v.promise.resolve(); // TODO or reject?
|
|
@@ -1730,13 +2590,24 @@ let SharedLog = (() => {
|
|
|
1730
2590
|
for (const [_k, v] of this._pendingIHave) {
|
|
1731
2591
|
v.clear();
|
|
1732
2592
|
}
|
|
2593
|
+
for (const [_k, v] of this._checkedPruneRetries) {
|
|
2594
|
+
if (v.timer)
|
|
2595
|
+
clearTimeout(v.timer);
|
|
2596
|
+
}
|
|
1733
2597
|
await this.remoteBlocks.stop();
|
|
1734
2598
|
this._pendingDeletes.clear();
|
|
1735
2599
|
this._pendingIHave.clear();
|
|
2600
|
+
this._checkedPruneRetries.clear();
|
|
1736
2601
|
this.latestReplicationInfoMessage.clear();
|
|
1737
2602
|
this._gidPeersHistory.clear();
|
|
1738
2603
|
this._requestIPruneSent.clear();
|
|
1739
2604
|
this._requestIPruneResponseReplicatorSet.clear();
|
|
2605
|
+
// Cancel any pending debounced timers so they can't fire after we've torn down
|
|
2606
|
+
// indexes/RPC state.
|
|
2607
|
+
this.rebalanceParticipationDebounced?.close();
|
|
2608
|
+
this.replicationChangeDebounceFn?.close?.();
|
|
2609
|
+
this.pruneDebouncedFn?.close?.();
|
|
2610
|
+
this.responseToPruneDebouncedFn?.close?.();
|
|
1740
2611
|
this.pruneDebouncedFn = undefined;
|
|
1741
2612
|
this.rebalanceParticipationDebounced = undefined;
|
|
1742
2613
|
this._replicationRangeIndex.stop();
|
|
@@ -1747,6 +2618,53 @@ let SharedLog = (() => {
|
|
|
1747
2618
|
/* this._totalParticipation = 0; */
|
|
1748
2619
|
}
|
|
1749
2620
|
async close(from) {
|
|
2621
|
+
// Best-effort: announce that we are going offline before tearing down
|
|
2622
|
+
// RPC/subscription state.
|
|
2623
|
+
//
|
|
2624
|
+
// Important: do not delete our local replication ranges here. Keeping them
|
|
2625
|
+
// allows `replicate: { type: "resume" }` to restore the previous role on
|
|
2626
|
+
// restart. Explicit `unreplicate()` still clears local state.
|
|
2627
|
+
try {
|
|
2628
|
+
if (!this.closed) {
|
|
2629
|
+
// Prevent any late debounced timers (rebalance/prune) from publishing
|
|
2630
|
+
// replication info after we announce "segments: []". These races can leave
|
|
2631
|
+
// stale segments on remotes after rapid open/close cycles.
|
|
2632
|
+
this._isReplicating = false;
|
|
2633
|
+
this._isAdaptiveReplicating = false;
|
|
2634
|
+
this.rebalanceParticipationDebounced?.close();
|
|
2635
|
+
this.replicationChangeDebounceFn?.close?.();
|
|
2636
|
+
this.pruneDebouncedFn?.close?.();
|
|
2637
|
+
this.responseToPruneDebouncedFn?.close?.();
|
|
2638
|
+
// Ensure the "I'm leaving" replication reset is actually published before
|
|
2639
|
+
// the RPC child program closes and unsubscribes from its topic. If we fire
|
|
2640
|
+
// and forget here, the publish can race with `super.close()` and get dropped,
|
|
2641
|
+
// leaving stale replication segments on remotes (flaky join/leave tests).
|
|
2642
|
+
// Also ensure close is bounded even when shard overlays are mid-reconcile.
|
|
2643
|
+
const abort = new AbortController();
|
|
2644
|
+
const abortTimer = setTimeout(() => {
|
|
2645
|
+
try {
|
|
2646
|
+
abort.abort(new TimeoutError("shared-log close replication reset timed out"));
|
|
2647
|
+
}
|
|
2648
|
+
catch {
|
|
2649
|
+
abort.abort();
|
|
2650
|
+
}
|
|
2651
|
+
}, 2_000);
|
|
2652
|
+
try {
|
|
2653
|
+
await this.rpc
|
|
2654
|
+
.send(new AllReplicatingSegmentsMessage({ segments: [] }), {
|
|
2655
|
+
priority: 1,
|
|
2656
|
+
signal: abort.signal,
|
|
2657
|
+
})
|
|
2658
|
+
.catch(() => { });
|
|
2659
|
+
}
|
|
2660
|
+
finally {
|
|
2661
|
+
clearTimeout(abortTimer);
|
|
2662
|
+
}
|
|
2663
|
+
}
|
|
2664
|
+
}
|
|
2665
|
+
catch {
|
|
2666
|
+
// ignore: close should be resilient even if we were never fully started
|
|
2667
|
+
}
|
|
1750
2668
|
const superClosed = await super.close(from);
|
|
1751
2669
|
if (!superClosed) {
|
|
1752
2670
|
return superClosed;
|
|
@@ -1756,6 +2674,41 @@ let SharedLog = (() => {
|
|
|
1756
2674
|
return true;
|
|
1757
2675
|
}
|
|
1758
2676
|
async drop(from) {
|
|
2677
|
+
// Best-effort: announce that we are going offline before tearing down
|
|
2678
|
+
// RPC/subscription state (same reasoning as in `close()`).
|
|
2679
|
+
try {
|
|
2680
|
+
if (!this.closed) {
|
|
2681
|
+
this._isReplicating = false;
|
|
2682
|
+
this._isAdaptiveReplicating = false;
|
|
2683
|
+
this.rebalanceParticipationDebounced?.close();
|
|
2684
|
+
this.replicationChangeDebounceFn?.close?.();
|
|
2685
|
+
this.pruneDebouncedFn?.close?.();
|
|
2686
|
+
this.responseToPruneDebouncedFn?.close?.();
|
|
2687
|
+
const abort = new AbortController();
|
|
2688
|
+
const abortTimer = setTimeout(() => {
|
|
2689
|
+
try {
|
|
2690
|
+
abort.abort(new TimeoutError("shared-log drop replication reset timed out"));
|
|
2691
|
+
}
|
|
2692
|
+
catch {
|
|
2693
|
+
abort.abort();
|
|
2694
|
+
}
|
|
2695
|
+
}, 2_000);
|
|
2696
|
+
try {
|
|
2697
|
+
await this.rpc
|
|
2698
|
+
.send(new AllReplicatingSegmentsMessage({ segments: [] }), {
|
|
2699
|
+
priority: 1,
|
|
2700
|
+
signal: abort.signal,
|
|
2701
|
+
})
|
|
2702
|
+
.catch(() => { });
|
|
2703
|
+
}
|
|
2704
|
+
finally {
|
|
2705
|
+
clearTimeout(abortTimer);
|
|
2706
|
+
}
|
|
2707
|
+
}
|
|
2708
|
+
}
|
|
2709
|
+
catch {
|
|
2710
|
+
// ignore: drop should be resilient even if we were never fully started
|
|
2711
|
+
}
|
|
1759
2712
|
const superDropped = await super.drop(from);
|
|
1760
2713
|
if (!superDropped) {
|
|
1761
2714
|
return superDropped;
|
|
@@ -2041,7 +2994,7 @@ let SharedLog = (() => {
|
|
|
2041
2994
|
const segments = (await this.getMyReplicationSegments()).map((x) => x.toReplicationRange());
|
|
2042
2995
|
this.rpc
|
|
2043
2996
|
.send(new AllReplicatingSegmentsMessage({ segments }), {
|
|
2044
|
-
mode: new
|
|
2997
|
+
mode: new AcknowledgeDelivery({ to: [context.from], redundancy: 1 }),
|
|
2045
2998
|
})
|
|
2046
2999
|
.catch((e) => logger.error(e.toString()));
|
|
2047
3000
|
// for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
|
|
@@ -2072,33 +3025,55 @@ let SharedLog = (() => {
|
|
|
2072
3025
|
// `Program.waitFor()`. Dropping these messages can lead to missing replicator info
|
|
2073
3026
|
// (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
|
|
2074
3027
|
const from = context.from;
|
|
3028
|
+
const fromHash = from.hashcode();
|
|
3029
|
+
if (this._replicationInfoBlockedPeers.has(fromHash)) {
|
|
3030
|
+
return;
|
|
3031
|
+
}
|
|
2075
3032
|
const messageTimestamp = context.message.header.timestamp;
|
|
2076
|
-
(async () => {
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
3033
|
+
await this.withReplicationInfoApplyQueue(fromHash, async () => {
|
|
3034
|
+
try {
|
|
3035
|
+
// The peer may have unsubscribed after this message was queued.
|
|
3036
|
+
if (this._replicationInfoBlockedPeers.has(fromHash)) {
|
|
3037
|
+
return;
|
|
3038
|
+
}
|
|
3039
|
+
// Process in-order to avoid races where repeated reset messages arrive
|
|
3040
|
+
// concurrently and trigger spurious "added" diffs / rebalancing.
|
|
3041
|
+
const prev = this.latestReplicationInfoMessage.get(fromHash);
|
|
3042
|
+
if (prev && prev > messageTimestamp) {
|
|
3043
|
+
return;
|
|
3044
|
+
}
|
|
3045
|
+
this.latestReplicationInfoMessage.set(fromHash, messageTimestamp);
|
|
3046
|
+
if (this.closed) {
|
|
3047
|
+
return;
|
|
3048
|
+
}
|
|
3049
|
+
const reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
3050
|
+
await this.addReplicationRange(replicationInfoMessage.segments.map((x) => x.toReplicationRangeIndexable(from)), from, {
|
|
3051
|
+
reset,
|
|
3052
|
+
checkDuplicates: true,
|
|
3053
|
+
timestamp: Number(messageTimestamp),
|
|
3054
|
+
});
|
|
3055
|
+
// If the peer reports any replication segments, stop re-requesting.
|
|
3056
|
+
// (Empty reports can be transient during startup.)
|
|
3057
|
+
if (replicationInfoMessage.segments.length > 0) {
|
|
3058
|
+
this.cancelReplicationInfoRequests(fromHash);
|
|
3059
|
+
}
|
|
2084
3060
|
}
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
});
|
|
2091
|
-
})().catch((e) => {
|
|
2092
|
-
if (isNotStartedError(e)) {
|
|
2093
|
-
return;
|
|
3061
|
+
catch (e) {
|
|
3062
|
+
if (isNotStartedError(e)) {
|
|
3063
|
+
return;
|
|
3064
|
+
}
|
|
3065
|
+
logger.error(`Failed to apply replication settings from '${fromHash}': ${e?.message ?? e}`);
|
|
2094
3066
|
}
|
|
2095
|
-
logger.error(`Failed to apply replication settings from '${from.hashcode()}': ${e?.message ?? e}`);
|
|
2096
3067
|
});
|
|
2097
3068
|
}
|
|
2098
3069
|
else if (msg instanceof StoppedReplicating) {
|
|
2099
3070
|
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2100
3071
|
return;
|
|
2101
3072
|
}
|
|
3073
|
+
const fromHash = context.from.hashcode();
|
|
3074
|
+
if (this._replicationInfoBlockedPeers.has(fromHash)) {
|
|
3075
|
+
return;
|
|
3076
|
+
}
|
|
2102
3077
|
const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(msg.segmentIds, context.from);
|
|
2103
3078
|
await this.removeReplicationRanges(rangesToRemove, context.from);
|
|
2104
3079
|
const timestamp = BigInt(+new Date());
|
|
@@ -2363,12 +3338,17 @@ let SharedLog = (() => {
|
|
|
2363
3338
|
requestTimer = undefined;
|
|
2364
3339
|
}
|
|
2365
3340
|
};
|
|
2366
|
-
const resolve = () => {
|
|
3341
|
+
const resolve = async () => {
|
|
2367
3342
|
if (settled) {
|
|
2368
3343
|
return;
|
|
2369
3344
|
}
|
|
2370
3345
|
settled = true;
|
|
2371
3346
|
clear();
|
|
3347
|
+
// `waitForReplicator()` is typically used as a precondition before join/replicate
|
|
3348
|
+
// flows. A replicator can become mature and enqueue a debounced rebalance
|
|
3349
|
+
// (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
|
|
3350
|
+
// observe a "late" rebalance after the wait resolves.
|
|
3351
|
+
await this.replicationChangeDebounceFn?.flush?.();
|
|
2372
3352
|
deferred.resolve();
|
|
2373
3353
|
};
|
|
2374
3354
|
const reject = (error) => {
|
|
@@ -2400,13 +3380,14 @@ let SharedLog = (() => {
|
|
|
2400
3380
|
requestAttempts++;
|
|
2401
3381
|
this.rpc
|
|
2402
3382
|
.send(new RequestReplicationInfoMessage(), {
|
|
2403
|
-
mode: new
|
|
3383
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [key] }),
|
|
2404
3384
|
})
|
|
2405
3385
|
.catch((e) => {
|
|
2406
3386
|
// Best-effort: missing peers / unopened RPC should not fail the wait logic.
|
|
2407
3387
|
if (isNotStartedError(e)) {
|
|
2408
3388
|
return;
|
|
2409
3389
|
}
|
|
3390
|
+
logger.error(e?.toString?.() ?? String(e));
|
|
2410
3391
|
});
|
|
2411
3392
|
if (requestAttempts < maxRequestAttempts) {
|
|
2412
3393
|
requestTimer = setTimeout(requestReplicationInfo, requestIntervalMs);
|
|
@@ -2425,7 +3406,7 @@ let SharedLog = (() => {
|
|
|
2425
3406
|
return;
|
|
2426
3407
|
}
|
|
2427
3408
|
}
|
|
2428
|
-
resolve();
|
|
3409
|
+
await resolve();
|
|
2429
3410
|
}
|
|
2430
3411
|
catch (error) {
|
|
2431
3412
|
reject(error instanceof Error ? error : new Error(String(error)));
|
|
@@ -2441,12 +3422,6 @@ let SharedLog = (() => {
|
|
|
2441
3422
|
return deferred.promise.finally(clear);
|
|
2442
3423
|
}
|
|
2443
3424
|
async waitForReplicators(options) {
|
|
2444
|
-
// if no remotes, just return
|
|
2445
|
-
const subscribers = await this.node.services.pubsub.getSubscribers(this.rpc.topic);
|
|
2446
|
-
let waitForNewPeers = options?.waitForNewPeers;
|
|
2447
|
-
if (!waitForNewPeers && (subscribers?.length ?? 0) === 0) {
|
|
2448
|
-
throw new NoPeersError(this.rpc.topic);
|
|
2449
|
-
}
|
|
2450
3425
|
let coverageThreshold = options?.coverageThreshold ?? 1;
|
|
2451
3426
|
let deferred = pDefer();
|
|
2452
3427
|
let settled = false;
|
|
@@ -2529,19 +3504,33 @@ let SharedLog = (() => {
|
|
|
2529
3504
|
async _waitForReplicators(cursors, entry, waitFor, options = { timeout: this.waitForReplicatorTimeout }) {
|
|
2530
3505
|
const timeout = options.timeout ?? this.waitForReplicatorTimeout;
|
|
2531
3506
|
return new Promise((resolve, reject) => {
|
|
3507
|
+
let settled = false;
|
|
2532
3508
|
const removeListeners = () => {
|
|
2533
3509
|
this.events.removeEventListener("replication:change", roleListener);
|
|
2534
3510
|
this.events.removeEventListener("replicator:mature", roleListener); // TODO replication:change event ?
|
|
2535
3511
|
this._closeController.signal.removeEventListener("abort", abortListener);
|
|
2536
3512
|
};
|
|
2537
|
-
const
|
|
3513
|
+
const settleResolve = (value) => {
|
|
3514
|
+
if (settled)
|
|
3515
|
+
return;
|
|
3516
|
+
settled = true;
|
|
2538
3517
|
removeListeners();
|
|
2539
3518
|
clearTimeout(timer);
|
|
2540
|
-
resolve(
|
|
3519
|
+
resolve(value);
|
|
2541
3520
|
};
|
|
2542
|
-
const
|
|
3521
|
+
const settleReject = (error) => {
|
|
3522
|
+
if (settled)
|
|
3523
|
+
return;
|
|
3524
|
+
settled = true;
|
|
2543
3525
|
removeListeners();
|
|
2544
|
-
|
|
3526
|
+
clearTimeout(timer);
|
|
3527
|
+
reject(error);
|
|
3528
|
+
};
|
|
3529
|
+
const abortListener = () => {
|
|
3530
|
+
settleResolve(false);
|
|
3531
|
+
};
|
|
3532
|
+
const timer = setTimeout(async () => {
|
|
3533
|
+
settleResolve(false);
|
|
2545
3534
|
}, timeout);
|
|
2546
3535
|
const check = async () => {
|
|
2547
3536
|
let leaderKeys = new Set();
|
|
@@ -2561,17 +3550,20 @@ let SharedLog = (() => {
|
|
|
2561
3550
|
}
|
|
2562
3551
|
}
|
|
2563
3552
|
options?.onLeader && leaderKeys.forEach(options.onLeader);
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
3553
|
+
settleResolve(leaders);
|
|
3554
|
+
};
|
|
3555
|
+
const runCheck = () => {
|
|
3556
|
+
void check().catch((error) => {
|
|
3557
|
+
settleReject(error);
|
|
3558
|
+
});
|
|
2567
3559
|
};
|
|
2568
3560
|
const roleListener = () => {
|
|
2569
|
-
|
|
3561
|
+
runCheck();
|
|
2570
3562
|
};
|
|
2571
3563
|
this.events.addEventListener("replication:change", roleListener); // TODO replication:change event ?
|
|
2572
3564
|
this.events.addEventListener("replicator:mature", roleListener); // TODO replication:change event ?
|
|
2573
3565
|
this._closeController.signal.addEventListener("abort", abortListener);
|
|
2574
|
-
|
|
3566
|
+
runCheck();
|
|
2575
3567
|
});
|
|
2576
3568
|
}
|
|
2577
3569
|
async createCoordinates(entry, minReplicas) {
|
|
@@ -2628,9 +3620,7 @@ let SharedLog = (() => {
|
|
|
2628
3620
|
let subscribers = 1;
|
|
2629
3621
|
if (!this.rpc.closed) {
|
|
2630
3622
|
try {
|
|
2631
|
-
subscribers =
|
|
2632
|
-
(await this.node.services.pubsub.getSubscribers(this.rpc.topic))
|
|
2633
|
-
?.length ?? 1;
|
|
3623
|
+
subscribers = (await this._getTopicSubscribers(this.rpc.topic))?.length ?? 1;
|
|
2634
3624
|
}
|
|
2635
3625
|
catch {
|
|
2636
3626
|
// Best-effort only; fall back to 1.
|
|
@@ -2697,22 +3687,48 @@ let SharedLog = (() => {
|
|
|
2697
3687
|
async _findLeaders(cursors, options) {
|
|
2698
3688
|
const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
|
|
2699
3689
|
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2700
|
-
//
|
|
2701
|
-
//
|
|
2702
|
-
//
|
|
3690
|
+
// Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
|
|
3691
|
+
// If it is still warming up (for example, only contains self), supplement with
|
|
3692
|
+
// current subscribers until we have enough candidates for this decision.
|
|
2703
3693
|
let peerFilter = undefined;
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
3694
|
+
const selfReplicating = await this.isReplicating();
|
|
3695
|
+
if (this.uniqueReplicators.size > 0) {
|
|
3696
|
+
peerFilter = new Set(this.uniqueReplicators);
|
|
3697
|
+
if (selfReplicating) {
|
|
3698
|
+
peerFilter.add(selfHash);
|
|
3699
|
+
}
|
|
3700
|
+
else {
|
|
3701
|
+
peerFilter.delete(selfHash);
|
|
3702
|
+
}
|
|
3703
|
+
try {
|
|
3704
|
+
const subscribers = await this._getTopicSubscribers(this.topic);
|
|
3705
|
+
if (subscribers && subscribers.length > 0) {
|
|
3706
|
+
for (const subscriber of subscribers) {
|
|
3707
|
+
peerFilter.add(subscriber.hashcode());
|
|
3708
|
+
}
|
|
3709
|
+
if (selfReplicating) {
|
|
3710
|
+
peerFilter.add(selfHash);
|
|
3711
|
+
}
|
|
3712
|
+
else {
|
|
3713
|
+
peerFilter.delete(selfHash);
|
|
3714
|
+
}
|
|
3715
|
+
}
|
|
3716
|
+
}
|
|
3717
|
+
catch {
|
|
3718
|
+
// Best-effort only; keep current peerFilter.
|
|
3719
|
+
}
|
|
2708
3720
|
}
|
|
2709
3721
|
else {
|
|
2710
3722
|
try {
|
|
2711
|
-
const subscribers = (await this.
|
|
2712
|
-
undefined;
|
|
3723
|
+
const subscribers = (await this._getTopicSubscribers(this.topic)) ?? undefined;
|
|
2713
3724
|
if (subscribers && subscribers.length > 0) {
|
|
2714
3725
|
peerFilter = new Set(subscribers.map((key) => key.hashcode()));
|
|
2715
|
-
|
|
3726
|
+
if (selfReplicating) {
|
|
3727
|
+
peerFilter.add(selfHash);
|
|
3728
|
+
}
|
|
3729
|
+
else {
|
|
3730
|
+
peerFilter.delete(selfHash);
|
|
3731
|
+
}
|
|
2716
3732
|
}
|
|
2717
3733
|
}
|
|
2718
3734
|
catch {
|
|
@@ -2735,28 +3751,110 @@ let SharedLog = (() => {
|
|
|
2735
3751
|
replicas: maxReplicas(this, [entry]),
|
|
2736
3752
|
}, options);
|
|
2737
3753
|
}
|
|
3754
|
+
withReplicationInfoApplyQueue(peerHash, fn) {
|
|
3755
|
+
const prev = this._replicationInfoApplyQueueByPeer.get(peerHash);
|
|
3756
|
+
const next = (prev ?? Promise.resolve())
|
|
3757
|
+
.catch(() => {
|
|
3758
|
+
// Avoid stuck queues if a previous apply failed.
|
|
3759
|
+
})
|
|
3760
|
+
.then(fn);
|
|
3761
|
+
this._replicationInfoApplyQueueByPeer.set(peerHash, next);
|
|
3762
|
+
return next.finally(() => {
|
|
3763
|
+
if (this._replicationInfoApplyQueueByPeer.get(peerHash) === next) {
|
|
3764
|
+
this._replicationInfoApplyQueueByPeer.delete(peerHash);
|
|
3765
|
+
}
|
|
3766
|
+
});
|
|
3767
|
+
}
|
|
3768
|
+
cancelReplicationInfoRequests(peerHash) {
|
|
3769
|
+
const state = this._replicationInfoRequestByPeer.get(peerHash);
|
|
3770
|
+
if (!state)
|
|
3771
|
+
return;
|
|
3772
|
+
if (state.timer) {
|
|
3773
|
+
clearTimeout(state.timer);
|
|
3774
|
+
}
|
|
3775
|
+
this._replicationInfoRequestByPeer.delete(peerHash);
|
|
3776
|
+
}
|
|
3777
|
+
scheduleReplicationInfoRequests(peer) {
|
|
3778
|
+
const peerHash = peer.hashcode();
|
|
3779
|
+
if (this._replicationInfoRequestByPeer.has(peerHash)) {
|
|
3780
|
+
return;
|
|
3781
|
+
}
|
|
3782
|
+
const state = {
|
|
3783
|
+
attempts: 0,
|
|
3784
|
+
};
|
|
3785
|
+
this._replicationInfoRequestByPeer.set(peerHash, state);
|
|
3786
|
+
const intervalMs = Math.max(50, this.waitForReplicatorRequestIntervalMs);
|
|
3787
|
+
const maxAttempts = Math.min(5, this.waitForReplicatorRequestMaxAttempts ??
|
|
3788
|
+
WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS);
|
|
3789
|
+
const tick = () => {
|
|
3790
|
+
if (this.closed || this._closeController.signal.aborted) {
|
|
3791
|
+
this.cancelReplicationInfoRequests(peerHash);
|
|
3792
|
+
return;
|
|
3793
|
+
}
|
|
3794
|
+
state.attempts++;
|
|
3795
|
+
this.rpc
|
|
3796
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3797
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [peer] }),
|
|
3798
|
+
})
|
|
3799
|
+
.catch((e) => {
|
|
3800
|
+
// Best-effort: missing peers / unopened RPC should not fail join flows.
|
|
3801
|
+
if (isNotStartedError(e)) {
|
|
3802
|
+
return;
|
|
3803
|
+
}
|
|
3804
|
+
logger.error(e?.toString?.() ?? String(e));
|
|
3805
|
+
});
|
|
3806
|
+
if (state.attempts >= maxAttempts) {
|
|
3807
|
+
this.cancelReplicationInfoRequests(peerHash);
|
|
3808
|
+
return;
|
|
3809
|
+
}
|
|
3810
|
+
state.timer = setTimeout(tick, intervalMs);
|
|
3811
|
+
state.timer.unref?.();
|
|
3812
|
+
};
|
|
3813
|
+
tick();
|
|
3814
|
+
}
|
|
2738
3815
|
async handleSubscriptionChange(publicKey, topics, subscribed) {
|
|
2739
3816
|
if (!topics.includes(this.topic)) {
|
|
2740
3817
|
return;
|
|
2741
3818
|
}
|
|
3819
|
+
const peerHash = publicKey.hashcode();
|
|
3820
|
+
if (subscribed) {
|
|
3821
|
+
this._replicationInfoBlockedPeers.delete(peerHash);
|
|
3822
|
+
}
|
|
3823
|
+
else {
|
|
3824
|
+
this._replicationInfoBlockedPeers.add(peerHash);
|
|
3825
|
+
}
|
|
2742
3826
|
if (!subscribed) {
|
|
2743
|
-
this.
|
|
3827
|
+
const wasReplicator = this.uniqueReplicators.has(peerHash);
|
|
3828
|
+
try {
|
|
3829
|
+
// Unsubscribe can race with the peer's final replication reset message.
|
|
3830
|
+
// Proactively evict its ranges so leader selection doesn't keep stale owners.
|
|
3831
|
+
await this.removeReplicator(publicKey, { noEvent: true });
|
|
3832
|
+
}
|
|
3833
|
+
catch (error) {
|
|
3834
|
+
if (!isNotStartedError(error)) {
|
|
3835
|
+
throw error;
|
|
3836
|
+
}
|
|
3837
|
+
}
|
|
3838
|
+
// Emit replicator:leave at most once per (join -> leave) transition, even if we
|
|
3839
|
+
// concurrently process unsubscribe + replication reset messages for the same peer.
|
|
3840
|
+
const stoppedTransition = wasReplicator;
|
|
3841
|
+
this._replicatorJoinEmitted.delete(peerHash);
|
|
3842
|
+
this.cancelReplicationInfoRequests(peerHash);
|
|
3843
|
+
this.removePeerFromGidPeerHistory(peerHash);
|
|
2744
3844
|
for (const [k, v] of this._requestIPruneSent) {
|
|
2745
|
-
v.delete(
|
|
3845
|
+
v.delete(peerHash);
|
|
2746
3846
|
if (v.size === 0) {
|
|
2747
3847
|
this._requestIPruneSent.delete(k);
|
|
2748
3848
|
}
|
|
2749
3849
|
}
|
|
2750
3850
|
for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
|
|
2751
|
-
v.delete(
|
|
3851
|
+
v.delete(peerHash);
|
|
2752
3852
|
if (v.size === 0) {
|
|
2753
3853
|
this._requestIPruneResponseReplicatorSet.delete(k);
|
|
2754
3854
|
}
|
|
2755
3855
|
}
|
|
2756
3856
|
this.syncronizer.onPeerDisconnected(publicKey);
|
|
2757
|
-
|
|
2758
|
-
query: { hash: publicKey.hashcode() },
|
|
2759
|
-
})) > 0 &&
|
|
3857
|
+
stoppedTransition &&
|
|
2760
3858
|
this.events.dispatchEvent(new CustomEvent("replicator:leave", {
|
|
2761
3859
|
detail: { publicKey },
|
|
2762
3860
|
}));
|
|
@@ -2768,14 +3866,14 @@ let SharedLog = (() => {
|
|
|
2768
3866
|
.send(new AllReplicatingSegmentsMessage({
|
|
2769
3867
|
segments: replicationSegments.map((x) => x.toReplicationRange()),
|
|
2770
3868
|
}), {
|
|
2771
|
-
mode: new
|
|
3869
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
|
|
2772
3870
|
})
|
|
2773
3871
|
.catch((e) => logger.error(e.toString()));
|
|
2774
3872
|
if (this.v8Behaviour) {
|
|
2775
3873
|
// for backwards compatibility
|
|
2776
3874
|
this.rpc
|
|
2777
3875
|
.send(new ResponseRoleMessage({ role: await this.getRole() }), {
|
|
2778
|
-
mode: new
|
|
3876
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
|
|
2779
3877
|
})
|
|
2780
3878
|
.catch((e) => logger.error(e.toString()));
|
|
2781
3879
|
}
|
|
@@ -2783,11 +3881,7 @@ let SharedLog = (() => {
|
|
|
2783
3881
|
// Request the remote peer's replication info. This makes joins resilient to
|
|
2784
3882
|
// timing-sensitive delivery/order issues where we may miss their initial
|
|
2785
3883
|
// replication announcement.
|
|
2786
|
-
this.
|
|
2787
|
-
.send(new RequestReplicationInfoMessage(), {
|
|
2788
|
-
mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
|
|
2789
|
-
})
|
|
2790
|
-
.catch((e) => logger.error(e.toString()));
|
|
3884
|
+
this.scheduleReplicationInfoRequests(publicKey);
|
|
2791
3885
|
}
|
|
2792
3886
|
else {
|
|
2793
3887
|
await this.removeReplicator(publicKey);
|
|
@@ -2841,6 +3935,7 @@ let SharedLog = (() => {
|
|
|
2841
3935
|
const promises = [];
|
|
2842
3936
|
let peerToEntries = new Map();
|
|
2843
3937
|
let cleanupTimer = [];
|
|
3938
|
+
const explicitTimeout = options?.timeout != null;
|
|
2844
3939
|
for (const { entry, leaders } of entries.values()) {
|
|
2845
3940
|
for (const leader of leaders.keys()) {
|
|
2846
3941
|
let set = peerToEntries.get(leader);
|
|
@@ -2852,7 +3947,28 @@ let SharedLog = (() => {
|
|
|
2852
3947
|
}
|
|
2853
3948
|
const pendingPrev = this._pendingDeletes.get(entry.hash);
|
|
2854
3949
|
if (pendingPrev) {
|
|
2855
|
-
|
|
3950
|
+
// If a background prune is already in-flight, an explicit prune request should
|
|
3951
|
+
// still respect the caller's timeout. Otherwise, tests (and user calls) can
|
|
3952
|
+
// block on the longer "checked prune" timeout derived from
|
|
3953
|
+
// `_respondToIHaveTimeout + waitForReplicatorTimeout`, which is intentionally
|
|
3954
|
+
// large for resiliency.
|
|
3955
|
+
if (explicitTimeout) {
|
|
3956
|
+
const timeoutMs = Math.max(0, Math.floor(options?.timeout ?? 0));
|
|
3957
|
+
promises.push(new Promise((resolve, reject) => {
|
|
3958
|
+
// Mirror the checked-prune error prefix so existing callers/tests can
|
|
3959
|
+
// match on the message substring.
|
|
3960
|
+
const timer = setTimeout(() => {
|
|
3961
|
+
reject(new Error(`Timeout for checked pruning after ${timeoutMs}ms (pending=true closed=${this.closed})`));
|
|
3962
|
+
}, timeoutMs);
|
|
3963
|
+
timer.unref?.();
|
|
3964
|
+
pendingPrev.promise.promise
|
|
3965
|
+
.then(resolve, reject)
|
|
3966
|
+
.finally(() => clearTimeout(timer));
|
|
3967
|
+
}));
|
|
3968
|
+
}
|
|
3969
|
+
else {
|
|
3970
|
+
promises.push(pendingPrev.promise.promise);
|
|
3971
|
+
}
|
|
2856
3972
|
continue;
|
|
2857
3973
|
}
|
|
2858
3974
|
const minReplicas = decodeReplicas(entry);
|
|
@@ -2866,6 +3982,7 @@ let SharedLog = (() => {
|
|
|
2866
3982
|
};
|
|
2867
3983
|
const resolve = () => {
|
|
2868
3984
|
clear();
|
|
3985
|
+
this.clearCheckedPruneRetry(entry.hash);
|
|
2869
3986
|
cleanupTimer.push(setTimeout(async () => {
|
|
2870
3987
|
this._gidPeersHistory.delete(entry.meta.gid);
|
|
2871
3988
|
this.removePruneRequestSent(entry.hash);
|
|
@@ -2903,6 +4020,12 @@ let SharedLog = (() => {
|
|
|
2903
4020
|
};
|
|
2904
4021
|
const reject = (e) => {
|
|
2905
4022
|
clear();
|
|
4023
|
+
const isCheckedPruneTimeout = e instanceof Error &&
|
|
4024
|
+
typeof e.message === "string" &&
|
|
4025
|
+
e.message.startsWith("Timeout for checked pruning");
|
|
4026
|
+
if (explicitTimeout || !isCheckedPruneTimeout) {
|
|
4027
|
+
this.clearCheckedPruneRetry(entry.hash);
|
|
4028
|
+
}
|
|
2906
4029
|
this.removePruneRequestSent(entry.hash);
|
|
2907
4030
|
this._requestIPruneResponseReplicatorSet.delete(entry.hash);
|
|
2908
4031
|
deferredPromise.reject(e);
|
|
@@ -2918,6 +4041,12 @@ let SharedLog = (() => {
|
|
|
2918
4041
|
this.waitForReplicatorTimeout +
|
|
2919
4042
|
PRUNE_DEBOUNCE_INTERVAL * 2);
|
|
2920
4043
|
const timeout = setTimeout(() => {
|
|
4044
|
+
// For internal/background prune flows (no explicit timeout), retry a few times
|
|
4045
|
+
// to avoid "permanently prunable" entries when `_pendingIHave` expires under
|
|
4046
|
+
// heavy load.
|
|
4047
|
+
if (!explicitTimeout) {
|
|
4048
|
+
this.scheduleCheckedPruneRetry({ entry, leaders });
|
|
4049
|
+
}
|
|
2921
4050
|
reject(new Error(`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`));
|
|
2922
4051
|
}, checkedPruneTimeoutMs);
|
|
2923
4052
|
timeout.unref?.();
|
|
@@ -2947,6 +4076,8 @@ let SharedLog = (() => {
|
|
|
2947
4076
|
this._requestIPruneResponseReplicatorSet.set(entry.hash, existCounter);
|
|
2948
4077
|
}
|
|
2949
4078
|
existCounter.add(publicKeyHash);
|
|
4079
|
+
// Seed provider hints so future remote reads can avoid extra round-trips.
|
|
4080
|
+
this.remoteBlocks.hintProviders(entry.hash, [publicKeyHash]);
|
|
2950
4081
|
if (minReplicasValue <= existCounter.size) {
|
|
2951
4082
|
resolve();
|
|
2952
4083
|
}
|
|
@@ -2984,6 +4115,37 @@ let SharedLog = (() => {
|
|
|
2984
4115
|
for (const [k, v] of peerToEntries) {
|
|
2985
4116
|
emitMessages(v, k);
|
|
2986
4117
|
}
|
|
4118
|
+
// Keep remote `_pendingIHave` alive in the common "leader doesn't have entry yet"
|
|
4119
|
+
// case. This is intentionally disabled when an explicit timeout is provided to
|
|
4120
|
+
// preserve unit tests that assert remote `_pendingIHave` clears promptly.
|
|
4121
|
+
if (!explicitTimeout && peerToEntries.size > 0) {
|
|
4122
|
+
const respondToIHaveTimeout = Number(this._respondToIHaveTimeout ?? 0);
|
|
4123
|
+
const resendIntervalMs = Math.min(CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS, Math.max(CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS, Math.floor(respondToIHaveTimeout / 2) || 1_000));
|
|
4124
|
+
let inFlight = false;
|
|
4125
|
+
const timer = setInterval(() => {
|
|
4126
|
+
if (inFlight)
|
|
4127
|
+
return;
|
|
4128
|
+
if (this.closed)
|
|
4129
|
+
return;
|
|
4130
|
+
const pendingByPeer = [];
|
|
4131
|
+
for (const [peer, hashes] of peerToEntries) {
|
|
4132
|
+
const pending = hashes.filter((h) => this._pendingDeletes.has(h));
|
|
4133
|
+
if (pending.length > 0) {
|
|
4134
|
+
pendingByPeer.push([peer, pending]);
|
|
4135
|
+
}
|
|
4136
|
+
}
|
|
4137
|
+
if (pendingByPeer.length === 0) {
|
|
4138
|
+
clearInterval(timer);
|
|
4139
|
+
return;
|
|
4140
|
+
}
|
|
4141
|
+
inFlight = true;
|
|
4142
|
+
Promise.allSettled(pendingByPeer.map(([peer, hashes]) => emitMessages(hashes, peer).catch(() => { }))).finally(() => {
|
|
4143
|
+
inFlight = false;
|
|
4144
|
+
});
|
|
4145
|
+
}, resendIntervalMs);
|
|
4146
|
+
timer.unref?.();
|
|
4147
|
+
cleanupTimer.push(timer);
|
|
4148
|
+
}
|
|
2987
4149
|
let cleanup = () => {
|
|
2988
4150
|
for (const timer of cleanupTimer) {
|
|
2989
4151
|
clearTimeout(timer);
|
|
@@ -3040,14 +4202,96 @@ let SharedLog = (() => {
|
|
|
3040
4202
|
return;
|
|
3041
4203
|
}
|
|
3042
4204
|
await this.log.trim();
|
|
4205
|
+
const batchedChanges = Array.isArray(changeOrChanges[0])
|
|
4206
|
+
? changeOrChanges
|
|
4207
|
+
: [changeOrChanges];
|
|
4208
|
+
const changes = batchedChanges.flat();
|
|
4209
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
4210
|
+
// On removed ranges (peer leaves / shrink), gid-level history can hide
|
|
4211
|
+
// per-entry gaps. Force a fresh delivery pass for reassigned entries.
|
|
4212
|
+
const forceFreshDelivery = changes.some((change) => change.type === "removed" && change.range.hash !== selfHash);
|
|
4213
|
+
const gidPeersHistorySnapshot = new Map();
|
|
4214
|
+
const dedupeCutoff = Date.now() - RECENT_REPAIR_DISPATCH_TTL_MS;
|
|
4215
|
+
for (const [target, hashes] of this._recentRepairDispatch) {
|
|
4216
|
+
for (const [hash, ts] of hashes) {
|
|
4217
|
+
if (ts <= dedupeCutoff) {
|
|
4218
|
+
hashes.delete(hash);
|
|
4219
|
+
}
|
|
4220
|
+
}
|
|
4221
|
+
if (hashes.size === 0) {
|
|
4222
|
+
this._recentRepairDispatch.delete(target);
|
|
4223
|
+
}
|
|
4224
|
+
}
|
|
3043
4225
|
const changed = false;
|
|
4226
|
+
const replacedPeers = new Set();
|
|
4227
|
+
for (const change of changes) {
|
|
4228
|
+
if (change.type === "replaced" && change.range.hash !== selfHash) {
|
|
4229
|
+
replacedPeers.add(change.range.hash);
|
|
4230
|
+
}
|
|
4231
|
+
}
|
|
4232
|
+
const addedPeers = new Set();
|
|
4233
|
+
for (const change of changes) {
|
|
4234
|
+
if (change.type === "added" || change.type === "replaced") {
|
|
4235
|
+
const hash = change.range.hash;
|
|
4236
|
+
if (hash !== selfHash) {
|
|
4237
|
+
// Range updates can reassign entries to an existing peer shortly after it
|
|
4238
|
+
// already received a subset. Avoid suppressing legitimate follow-up repair.
|
|
4239
|
+
this._recentRepairDispatch.delete(hash);
|
|
4240
|
+
}
|
|
4241
|
+
}
|
|
4242
|
+
if (change.type === "added") {
|
|
4243
|
+
const hash = change.range.hash;
|
|
4244
|
+
if (hash !== selfHash && !replacedPeers.has(hash)) {
|
|
4245
|
+
addedPeers.add(hash);
|
|
4246
|
+
}
|
|
4247
|
+
}
|
|
4248
|
+
}
|
|
3044
4249
|
try {
|
|
3045
4250
|
const uncheckedDeliver = new Map();
|
|
3046
|
-
|
|
4251
|
+
const flushUncheckedDeliverTarget = (target) => {
|
|
4252
|
+
const entries = uncheckedDeliver.get(target);
|
|
4253
|
+
if (!entries || entries.size === 0) {
|
|
4254
|
+
return;
|
|
4255
|
+
}
|
|
4256
|
+
const isJoinWarmupTarget = addedPeers.has(target);
|
|
4257
|
+
const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
|
|
4258
|
+
this.dispatchMaybeMissingEntries(target, entries, {
|
|
4259
|
+
bypassRecentDedupe,
|
|
4260
|
+
retryScheduleMs: isJoinWarmupTarget
|
|
4261
|
+
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
4262
|
+
: undefined,
|
|
4263
|
+
forceFreshDelivery,
|
|
4264
|
+
});
|
|
4265
|
+
uncheckedDeliver.delete(target);
|
|
4266
|
+
};
|
|
4267
|
+
const queueUncheckedDeliver = (target, entry) => {
|
|
4268
|
+
let set = uncheckedDeliver.get(target);
|
|
4269
|
+
if (!set) {
|
|
4270
|
+
set = new Map();
|
|
4271
|
+
uncheckedDeliver.set(target, set);
|
|
4272
|
+
}
|
|
4273
|
+
if (set.has(entry.hash)) {
|
|
4274
|
+
return;
|
|
4275
|
+
}
|
|
4276
|
+
set.set(entry.hash, entry);
|
|
4277
|
+
if (set.size >= this.repairSweepTargetBufferSize) {
|
|
4278
|
+
flushUncheckedDeliverTarget(target);
|
|
4279
|
+
}
|
|
4280
|
+
};
|
|
4281
|
+
for await (const entryReplicated of toRebalance(changes, this.entryCoordinatesIndex, this.recentlyRebalanced, { forceFresh: forceFreshDelivery })) {
|
|
3047
4282
|
if (this.closed) {
|
|
3048
4283
|
break;
|
|
3049
4284
|
}
|
|
3050
|
-
let oldPeersSet
|
|
4285
|
+
let oldPeersSet;
|
|
4286
|
+
if (!forceFreshDelivery) {
|
|
4287
|
+
const gid = entryReplicated.gid;
|
|
4288
|
+
oldPeersSet = gidPeersHistorySnapshot.get(gid);
|
|
4289
|
+
if (!gidPeersHistorySnapshot.has(gid)) {
|
|
4290
|
+
const existing = this._gidPeersHistory.get(gid);
|
|
4291
|
+
oldPeersSet = existing ? new Set(existing) : undefined;
|
|
4292
|
+
gidPeersHistorySnapshot.set(gid, oldPeersSet);
|
|
4293
|
+
}
|
|
4294
|
+
}
|
|
3051
4295
|
let isLeader = false;
|
|
3052
4296
|
let currentPeers = await this.findLeaders(entryReplicated.coordinates, entryReplicated, {
|
|
3053
4297
|
// we do this to make sure new replicators get data even though they are not mature so they can figure out if they want to replicate more or less
|
|
@@ -3060,14 +4304,7 @@ let SharedLog = (() => {
|
|
|
3060
4304
|
continue;
|
|
3061
4305
|
}
|
|
3062
4306
|
if (!oldPeersSet?.has(currentPeer)) {
|
|
3063
|
-
|
|
3064
|
-
if (!set) {
|
|
3065
|
-
set = new Map();
|
|
3066
|
-
uncheckedDeliver.set(currentPeer, set);
|
|
3067
|
-
}
|
|
3068
|
-
if (!set.has(entryReplicated.hash)) {
|
|
3069
|
-
set.set(entryReplicated.hash, entryReplicated);
|
|
3070
|
-
}
|
|
4307
|
+
queueUncheckedDeliver(currentPeer, entryReplicated);
|
|
3071
4308
|
}
|
|
3072
4309
|
}
|
|
3073
4310
|
if (oldPeersSet) {
|
|
@@ -3093,11 +4330,13 @@ let SharedLog = (() => {
|
|
|
3093
4330
|
this.removePruneRequestSent(entryReplicated.hash);
|
|
3094
4331
|
}
|
|
3095
4332
|
}
|
|
3096
|
-
|
|
3097
|
-
|
|
3098
|
-
|
|
3099
|
-
|
|
3100
|
-
|
|
4333
|
+
if (forceFreshDelivery || addedPeers.size > 0) {
|
|
4334
|
+
// Schedule a coalesced background sweep for churn/join windows instead of
|
|
4335
|
+
// scanning the whole index synchronously on each replication change.
|
|
4336
|
+
this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
|
|
4337
|
+
}
|
|
4338
|
+
for (const target of [...uncheckedDeliver.keys()]) {
|
|
4339
|
+
flushUncheckedDeliverTarget(target);
|
|
3101
4340
|
}
|
|
3102
4341
|
return changed;
|
|
3103
4342
|
}
|
|
@@ -3111,13 +4350,30 @@ let SharedLog = (() => {
|
|
|
3111
4350
|
}
|
|
3112
4351
|
async _onUnsubscription(evt) {
|
|
3113
4352
|
logger.trace(`Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(evt.detail.topics.map((x) => x))} '`);
|
|
3114
|
-
|
|
4353
|
+
if (!evt.detail.topics.includes(this.topic)) {
|
|
4354
|
+
return;
|
|
4355
|
+
}
|
|
4356
|
+
const fromHash = evt.detail.from.hashcode();
|
|
4357
|
+
this._replicationInfoBlockedPeers.add(fromHash);
|
|
4358
|
+
this._recentRepairDispatch.delete(fromHash);
|
|
4359
|
+
// Keep a per-peer timestamp watermark when we observe an unsubscribe. This
|
|
4360
|
+
// prevents late/out-of-order replication-info messages from re-introducing
|
|
4361
|
+
// stale segments for a peer that has already left the topic.
|
|
4362
|
+
const now = BigInt(+new Date());
|
|
4363
|
+
const prev = this.latestReplicationInfoMessage.get(fromHash);
|
|
4364
|
+
if (!prev || prev < now) {
|
|
4365
|
+
this.latestReplicationInfoMessage.set(fromHash, now);
|
|
4366
|
+
}
|
|
3115
4367
|
return this.handleSubscriptionChange(evt.detail.from, evt.detail.topics, false);
|
|
3116
4368
|
}
|
|
3117
4369
|
async _onSubscription(evt) {
|
|
3118
4370
|
logger.trace(`New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(evt.detail.topics.map((x) => x))}'`);
|
|
4371
|
+
if (!evt.detail.topics.includes(this.topic)) {
|
|
4372
|
+
return;
|
|
4373
|
+
}
|
|
3119
4374
|
this.remoteBlocks.onReachable(evt.detail.from);
|
|
3120
|
-
|
|
4375
|
+
this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
|
|
4376
|
+
await this.handleSubscriptionChange(evt.detail.from, evt.detail.topics, true);
|
|
3121
4377
|
}
|
|
3122
4378
|
async rebalanceParticipation() {
|
|
3123
4379
|
// update more participation rate to converge to the average expected rate or bounded by
|