@peerbit/shared-log 12.3.4 → 12.3.5-3f16953
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/fanout-envelope.d.ts +18 -0
- package/dist/src/fanout-envelope.d.ts.map +1 -0
- package/dist/src/fanout-envelope.js +85 -0
- package/dist/src/fanout-envelope.js.map +1 -0
- package/dist/src/index.d.ts +41 -3
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1217 -326
- package/dist/src/index.js.map +1 -1
- package/dist/src/pid.d.ts.map +1 -1
- package/dist/src/pid.js +21 -5
- package/dist/src/pid.js.map +1 -1
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +7 -3
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/rateless-iblt.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.js +42 -3
- package/dist/src/sync/rateless-iblt.js.map +1 -1
- package/package.json +20 -20
- package/src/fanout-envelope.ts +27 -0
- package/src/index.ts +1734 -698
- package/src/pid.ts +22 -4
- package/src/ranges.ts +7 -3
- package/src/sync/rateless-iblt.ts +58 -3
package/dist/src/index.js
CHANGED
|
@@ -32,19 +32,19 @@ var __runInitializers = (this && this.__runInitializers) || function (thisArg, i
|
|
|
32
32
|
}
|
|
33
33
|
return useValue ? value : void 0;
|
|
34
34
|
};
|
|
35
|
-
import { BorshError, field, variant } from "@dao-xyz/borsh";
|
|
35
|
+
import { BorshError, deserialize, field, serialize, variant } from "@dao-xyz/borsh";
|
|
36
36
|
import { AnyBlockStore, RemoteBlocks } from "@peerbit/blocks";
|
|
37
37
|
import { cidifyString } from "@peerbit/blocks-interface";
|
|
38
38
|
import { Cache } from "@peerbit/cache";
|
|
39
|
-
import { AccessError, PublicSignKey, sha256Base64Sync, sha256Sync, } from "@peerbit/crypto";
|
|
39
|
+
import { AccessError, PublicSignKey, getPublicKeyFromPeerId, sha256Base64Sync, sha256Sync, } from "@peerbit/crypto";
|
|
40
40
|
import { And, ByteMatchQuery, NotStartedError as IndexNotStartedError, Or, Sort, StringMatch, toId, } from "@peerbit/indexer-interface";
|
|
41
41
|
import { Entry, Log, Meta, ShallowEntry, } from "@peerbit/log";
|
|
42
42
|
import { logger as loggerFn } from "@peerbit/logger";
|
|
43
43
|
import { ClosedError, Program } from "@peerbit/program";
|
|
44
|
-
import { waitForSubscribers } from "@peerbit/pubsub";
|
|
44
|
+
import { FanoutChannel, waitForSubscribers, } from "@peerbit/pubsub";
|
|
45
45
|
import { SubscriptionEvent, UnsubcriptionEvent, } from "@peerbit/pubsub-interface";
|
|
46
46
|
import { RPC } from "@peerbit/rpc";
|
|
47
|
-
import { AcknowledgeDelivery, AnyWhere,
|
|
47
|
+
import { AcknowledgeDelivery, AnyWhere, DataMessage, MessageHeader, NotStartedError, SilentDelivery, } from "@peerbit/stream-interface";
|
|
48
48
|
import { AbortError, TimeoutError, debounceAccumulator, debounceFixedInterval, waitFor, } from "@peerbit/time";
|
|
49
49
|
import pDefer, {} from "p-defer";
|
|
50
50
|
import PQueue from "p-queue";
|
|
@@ -54,6 +54,7 @@ import { CPUUsageIntervalLag } from "./cpu.js";
|
|
|
54
54
|
import { debouncedAccumulatorMap, } from "./debounce.js";
|
|
55
55
|
import { NoPeersError } from "./errors.js";
|
|
56
56
|
import { EntryWithRefs, ExchangeHeadsMessage, RequestIPrune, ResponseIPrune, createExchangeHeadsMessages, } from "./exchange-heads.js";
|
|
57
|
+
import { FanoutEnvelope } from "./fanout-envelope.js";
|
|
57
58
|
import { MAX_U32, MAX_U64, bytesToNumber, createNumbers, denormalizer, } from "./integers.js";
|
|
58
59
|
import { TransportMessage } from "./message.js";
|
|
59
60
|
import { PIDReplicationController } from "./pid.js";
|
|
@@ -84,6 +85,34 @@ const getLatestEntry = (entries) => {
|
|
|
84
85
|
}
|
|
85
86
|
return latest;
|
|
86
87
|
};
|
|
88
|
+
const hashToSeed32 = (str) => {
|
|
89
|
+
// FNV-1a 32-bit, fast and deterministic.
|
|
90
|
+
let hash = 0x811c9dc5;
|
|
91
|
+
for (let i = 0; i < str.length; i++) {
|
|
92
|
+
hash ^= str.charCodeAt(i);
|
|
93
|
+
hash = Math.imul(hash, 0x01000193);
|
|
94
|
+
}
|
|
95
|
+
return hash >>> 0;
|
|
96
|
+
};
|
|
97
|
+
const pickDeterministicSubset = (peers, seed, max) => {
|
|
98
|
+
if (peers.length <= max)
|
|
99
|
+
return peers;
|
|
100
|
+
const subset = [];
|
|
101
|
+
const used = new Set();
|
|
102
|
+
let x = seed || 1;
|
|
103
|
+
while (subset.length < max) {
|
|
104
|
+
// xorshift32
|
|
105
|
+
x ^= x << 13;
|
|
106
|
+
x ^= x >>> 17;
|
|
107
|
+
x ^= x << 5;
|
|
108
|
+
const peer = peers[(x >>> 0) % peers.length];
|
|
109
|
+
if (!used.has(peer)) {
|
|
110
|
+
used.add(peer);
|
|
111
|
+
subset.push(peer);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return subset;
|
|
115
|
+
};
|
|
87
116
|
export { BlocksMessage };
|
|
88
117
|
const isAdaptiveReplicatorOption = (options) => {
|
|
89
118
|
if (typeof options === "number") {
|
|
@@ -176,6 +205,10 @@ export const WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS = 3;
|
|
|
176
205
|
// Prefer making pruning robust without timing-based heuristics.
|
|
177
206
|
export const WAIT_FOR_PRUNE_DELAY = 0;
|
|
178
207
|
const PRUNE_DEBOUNCE_INTERVAL = 500;
|
|
208
|
+
const CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS = 250;
|
|
209
|
+
const CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS = 5_000;
|
|
210
|
+
const CHECKED_PRUNE_RETRY_MAX_ATTEMPTS = 3;
|
|
211
|
+
const CHECKED_PRUNE_RETRY_MAX_DELAY_MS = 30_000;
|
|
179
212
|
// DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
|
|
180
213
|
const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
|
|
181
214
|
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE = 0.01;
|
|
@@ -183,6 +216,13 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
|
|
|
183
216
|
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
|
|
184
217
|
const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
|
|
185
218
|
const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
|
|
219
|
+
const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS = {
|
|
220
|
+
msgRate: 30,
|
|
221
|
+
msgSize: 1024,
|
|
222
|
+
uploadLimitBps: 5_000_000,
|
|
223
|
+
maxChildren: 24,
|
|
224
|
+
repair: true,
|
|
225
|
+
};
|
|
186
226
|
const getIdForDynamicRange = (publicKey) => {
|
|
187
227
|
return sha256Sync(concat([publicKey.bytes, new TextEncoder().encode("dynamic")]));
|
|
188
228
|
};
|
|
@@ -226,12 +266,17 @@ let SharedLog = (() => {
|
|
|
226
266
|
coordinateToHash;
|
|
227
267
|
recentlyRebalanced;
|
|
228
268
|
uniqueReplicators;
|
|
269
|
+
_replicatorJoinEmitted;
|
|
229
270
|
_replicatorsReconciled;
|
|
230
271
|
/* private _totalParticipation!: number; */
|
|
231
272
|
// gid -> coordinate -> publicKeyHash list (of owners)
|
|
232
273
|
_gidPeersHistory;
|
|
233
274
|
_onSubscriptionFn;
|
|
234
275
|
_onUnsubscriptionFn;
|
|
276
|
+
_onFanoutDataFn;
|
|
277
|
+
_onFanoutUnicastFn;
|
|
278
|
+
_fanoutChannel;
|
|
279
|
+
_providerHandle;
|
|
235
280
|
_isTrustedReplicator;
|
|
236
281
|
_logProperties;
|
|
237
282
|
_closeController;
|
|
@@ -241,6 +286,12 @@ let SharedLog = (() => {
|
|
|
241
286
|
// public key hash to range id to range
|
|
242
287
|
pendingMaturity; // map of peerId to timeout
|
|
243
288
|
latestReplicationInfoMessage;
|
|
289
|
+
// Peers that have unsubscribed from this log's topic. We ignore replication-info
|
|
290
|
+
// messages from them until we see a new subscription, to avoid re-introducing
|
|
291
|
+
// stale membership state during close/unsubscribe races.
|
|
292
|
+
_replicationInfoBlockedPeers;
|
|
293
|
+
_replicationInfoRequestByPeer;
|
|
294
|
+
_replicationInfoApplyQueueByPeer;
|
|
244
295
|
remoteBlocks;
|
|
245
296
|
openTime;
|
|
246
297
|
oldestOpenTime;
|
|
@@ -252,6 +303,7 @@ let SharedLog = (() => {
|
|
|
252
303
|
responseToPruneDebouncedFn;
|
|
253
304
|
_requestIPruneSent; // tracks entry hash to peer hash for requesting I prune messages
|
|
254
305
|
_requestIPruneResponseReplicatorSet; // tracks entry hash to peer hash
|
|
306
|
+
_checkedPruneRetries;
|
|
255
307
|
replicationChangeDebounceFn;
|
|
256
308
|
// regular distribution checks
|
|
257
309
|
distributeQueue;
|
|
@@ -283,6 +335,492 @@ let SharedLog = (() => {
|
|
|
283
335
|
get v8Behaviour() {
|
|
284
336
|
return (this.compatibility ?? Number.MAX_VALUE) < 9;
|
|
285
337
|
}
|
|
338
|
+
getFanoutChannelOptions(options) {
|
|
339
|
+
return {
|
|
340
|
+
...DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS,
|
|
341
|
+
...(options?.channel ?? {}),
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
async _openFanoutChannel(options) {
|
|
345
|
+
this._closeFanoutChannel();
|
|
346
|
+
if (!options) {
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
const fanoutService = this.node.services.fanout;
|
|
350
|
+
if (!fanoutService) {
|
|
351
|
+
throw new Error(`Fanout is configured for shared-log topic ${this.topic}, but no fanout service is available on this client`);
|
|
352
|
+
}
|
|
353
|
+
const resolvedRoot = options.root ??
|
|
354
|
+
(await fanoutService?.topicRootControlPlane?.resolveTopicRoot?.(this.topic));
|
|
355
|
+
if (!resolvedRoot) {
|
|
356
|
+
throw new Error(`Fanout is configured for shared-log topic ${this.topic}, but no fanout root was provided and none could be resolved`);
|
|
357
|
+
}
|
|
358
|
+
const channel = new FanoutChannel(fanoutService, {
|
|
359
|
+
topic: this.topic,
|
|
360
|
+
root: resolvedRoot,
|
|
361
|
+
});
|
|
362
|
+
this._fanoutChannel = channel;
|
|
363
|
+
this._onFanoutDataFn =
|
|
364
|
+
this._onFanoutDataFn ||
|
|
365
|
+
((evt) => {
|
|
366
|
+
const detail = evt?.detail;
|
|
367
|
+
if (!detail) {
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
void this._onFanoutData(detail).catch((error) => logger.error(error));
|
|
371
|
+
});
|
|
372
|
+
channel.addEventListener("data", this._onFanoutDataFn);
|
|
373
|
+
this._onFanoutUnicastFn =
|
|
374
|
+
this._onFanoutUnicastFn ||
|
|
375
|
+
((evt) => {
|
|
376
|
+
const detail = evt?.detail;
|
|
377
|
+
if (!detail) {
|
|
378
|
+
return;
|
|
379
|
+
}
|
|
380
|
+
void this._onFanoutUnicast(detail).catch((error) => logger.error(error));
|
|
381
|
+
});
|
|
382
|
+
channel.addEventListener("unicast", this._onFanoutUnicastFn);
|
|
383
|
+
try {
|
|
384
|
+
const channelOptions = this.getFanoutChannelOptions(options);
|
|
385
|
+
if (resolvedRoot === fanoutService.publicKeyHash) {
|
|
386
|
+
await channel.openAsRoot(channelOptions);
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
await channel.join(channelOptions, options.join);
|
|
390
|
+
}
|
|
391
|
+
catch (error) {
|
|
392
|
+
this._closeFanoutChannel();
|
|
393
|
+
throw error;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
_closeFanoutChannel() {
|
|
397
|
+
if (this._fanoutChannel) {
|
|
398
|
+
if (this._onFanoutDataFn) {
|
|
399
|
+
this._fanoutChannel.removeEventListener("data", this._onFanoutDataFn);
|
|
400
|
+
}
|
|
401
|
+
if (this._onFanoutUnicastFn) {
|
|
402
|
+
this._fanoutChannel.removeEventListener("unicast", this._onFanoutUnicastFn);
|
|
403
|
+
}
|
|
404
|
+
this._fanoutChannel.close();
|
|
405
|
+
}
|
|
406
|
+
this._fanoutChannel = undefined;
|
|
407
|
+
}
|
|
408
|
+
async _onFanoutData(detail) {
|
|
409
|
+
let envelope;
|
|
410
|
+
try {
|
|
411
|
+
envelope = deserialize(detail.payload, FanoutEnvelope);
|
|
412
|
+
}
|
|
413
|
+
catch (error) {
|
|
414
|
+
if (error instanceof BorshError) {
|
|
415
|
+
return;
|
|
416
|
+
}
|
|
417
|
+
throw error;
|
|
418
|
+
}
|
|
419
|
+
let message;
|
|
420
|
+
try {
|
|
421
|
+
message = deserialize(envelope.payload, TransportMessage);
|
|
422
|
+
}
|
|
423
|
+
catch (error) {
|
|
424
|
+
if (error instanceof BorshError) {
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
throw error;
|
|
428
|
+
}
|
|
429
|
+
if (!(message instanceof ExchangeHeadsMessage)) {
|
|
430
|
+
return;
|
|
431
|
+
}
|
|
432
|
+
const from = (await this._resolvePublicKeyFromHash(envelope.from)) ??
|
|
433
|
+
{ hashcode: () => envelope.from };
|
|
434
|
+
const contextMessage = new DataMessage({
|
|
435
|
+
header: new MessageHeader({
|
|
436
|
+
session: 0,
|
|
437
|
+
mode: new AnyWhere(),
|
|
438
|
+
priority: 0,
|
|
439
|
+
}),
|
|
440
|
+
});
|
|
441
|
+
contextMessage.header.timestamp = envelope.timestamp;
|
|
442
|
+
await this.onMessage(message, {
|
|
443
|
+
from,
|
|
444
|
+
message: contextMessage,
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
async _onFanoutUnicast(detail) {
|
|
448
|
+
let message;
|
|
449
|
+
try {
|
|
450
|
+
message = deserialize(detail.payload, TransportMessage);
|
|
451
|
+
}
|
|
452
|
+
catch (error) {
|
|
453
|
+
if (error instanceof BorshError) {
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
456
|
+
throw error;
|
|
457
|
+
}
|
|
458
|
+
const fromHash = detail.origin || detail.from;
|
|
459
|
+
const from = (await this._resolvePublicKeyFromHash(fromHash)) ??
|
|
460
|
+
{ hashcode: () => fromHash };
|
|
461
|
+
const contextMessage = new DataMessage({
|
|
462
|
+
header: new MessageHeader({
|
|
463
|
+
session: 0,
|
|
464
|
+
mode: new AnyWhere(),
|
|
465
|
+
priority: 0,
|
|
466
|
+
}),
|
|
467
|
+
});
|
|
468
|
+
contextMessage.header.timestamp = detail.timestamp;
|
|
469
|
+
await this.onMessage(message, {
|
|
470
|
+
from,
|
|
471
|
+
message: contextMessage,
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
async _publishExchangeHeadsViaFanout(message) {
|
|
475
|
+
if (!this._fanoutChannel) {
|
|
476
|
+
throw new Error(`No fanout channel configured for shared-log topic ${this.topic}`);
|
|
477
|
+
}
|
|
478
|
+
const envelope = new FanoutEnvelope({
|
|
479
|
+
from: this.node.identity.publicKey.hashcode(),
|
|
480
|
+
timestamp: BigInt(Date.now()),
|
|
481
|
+
payload: serialize(message),
|
|
482
|
+
});
|
|
483
|
+
await this._fanoutChannel.publish(serialize(envelope));
|
|
484
|
+
}
|
|
485
|
+
_parseDeliveryOptions(deliveryArg) {
|
|
486
|
+
const delivery = deliveryArg === undefined || deliveryArg === false
|
|
487
|
+
? undefined
|
|
488
|
+
: deliveryArg === true
|
|
489
|
+
? {}
|
|
490
|
+
: deliveryArg;
|
|
491
|
+
if (!delivery) {
|
|
492
|
+
return {
|
|
493
|
+
delivery: undefined,
|
|
494
|
+
requireRecipients: false,
|
|
495
|
+
settleMin: undefined,
|
|
496
|
+
wrap: undefined,
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
const deliverySettle = delivery.settle ?? true;
|
|
500
|
+
const deliveryTimeout = delivery.timeout;
|
|
501
|
+
const deliverySignal = delivery.signal;
|
|
502
|
+
const requireRecipients = delivery.requireRecipients === true;
|
|
503
|
+
const settleMin = typeof deliverySettle === "object" && Number.isFinite(deliverySettle.min)
|
|
504
|
+
? Math.max(0, Math.floor(deliverySettle.min))
|
|
505
|
+
: undefined;
|
|
506
|
+
const wrap = deliveryTimeout == null && deliverySignal == null
|
|
507
|
+
? undefined
|
|
508
|
+
: (promise) => new Promise((resolve, reject) => {
|
|
509
|
+
let settled = false;
|
|
510
|
+
let timer = undefined;
|
|
511
|
+
const onAbort = () => {
|
|
512
|
+
if (settled) {
|
|
513
|
+
return;
|
|
514
|
+
}
|
|
515
|
+
settled = true;
|
|
516
|
+
promise.catch(() => { });
|
|
517
|
+
cleanup();
|
|
518
|
+
reject(new AbortError());
|
|
519
|
+
};
|
|
520
|
+
const cleanup = () => {
|
|
521
|
+
if (timer != null) {
|
|
522
|
+
clearTimeout(timer);
|
|
523
|
+
timer = undefined;
|
|
524
|
+
}
|
|
525
|
+
deliverySignal?.removeEventListener("abort", onAbort);
|
|
526
|
+
};
|
|
527
|
+
if (deliverySignal) {
|
|
528
|
+
if (deliverySignal.aborted) {
|
|
529
|
+
onAbort();
|
|
530
|
+
return;
|
|
531
|
+
}
|
|
532
|
+
deliverySignal.addEventListener("abort", onAbort);
|
|
533
|
+
}
|
|
534
|
+
if (deliveryTimeout != null) {
|
|
535
|
+
timer = setTimeout(() => {
|
|
536
|
+
if (settled) {
|
|
537
|
+
return;
|
|
538
|
+
}
|
|
539
|
+
settled = true;
|
|
540
|
+
promise.catch(() => { });
|
|
541
|
+
cleanup();
|
|
542
|
+
reject(new TimeoutError(`Timeout waiting for delivery`));
|
|
543
|
+
}, deliveryTimeout);
|
|
544
|
+
}
|
|
545
|
+
promise
|
|
546
|
+
.then(() => {
|
|
547
|
+
if (settled) {
|
|
548
|
+
return;
|
|
549
|
+
}
|
|
550
|
+
settled = true;
|
|
551
|
+
cleanup();
|
|
552
|
+
resolve();
|
|
553
|
+
})
|
|
554
|
+
.catch((error) => {
|
|
555
|
+
if (settled) {
|
|
556
|
+
return;
|
|
557
|
+
}
|
|
558
|
+
settled = true;
|
|
559
|
+
cleanup();
|
|
560
|
+
reject(error);
|
|
561
|
+
});
|
|
562
|
+
});
|
|
563
|
+
return {
|
|
564
|
+
delivery,
|
|
565
|
+
requireRecipients,
|
|
566
|
+
settleMin,
|
|
567
|
+
wrap,
|
|
568
|
+
};
|
|
569
|
+
}
|
|
570
|
+
async _appendDeliverToReplicators(entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
|
|
571
|
+
const { delivery, requireRecipients, settleMin, wrap } = this._parseDeliveryOptions(deliveryArg);
|
|
572
|
+
const pending = [];
|
|
573
|
+
const track = (promise) => {
|
|
574
|
+
pending.push(wrap ? wrap(promise) : promise);
|
|
575
|
+
};
|
|
576
|
+
const fanoutUnicastOptions = delivery?.timeout != null || delivery?.signal != null
|
|
577
|
+
? { timeoutMs: delivery.timeout, signal: delivery.signal }
|
|
578
|
+
: undefined;
|
|
579
|
+
for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
|
|
580
|
+
await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
|
|
581
|
+
const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
|
|
582
|
+
const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
|
|
583
|
+
const hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
584
|
+
if (!hasRemotePeers) {
|
|
585
|
+
if (requireRecipients) {
|
|
586
|
+
throw new NoPeersError(this.rpc.topic);
|
|
587
|
+
}
|
|
588
|
+
continue;
|
|
589
|
+
}
|
|
590
|
+
if (!delivery) {
|
|
591
|
+
this.rpc
|
|
592
|
+
.send(message, {
|
|
593
|
+
mode: isLeader
|
|
594
|
+
? new SilentDelivery({ redundancy: 1, to: set })
|
|
595
|
+
: new AcknowledgeDelivery({ redundancy: 1, to: set }),
|
|
596
|
+
})
|
|
597
|
+
.catch((error) => logger.error(error));
|
|
598
|
+
continue;
|
|
599
|
+
}
|
|
600
|
+
const orderedRemoteRecipients = [];
|
|
601
|
+
for (const peer of leadersForDelivery) {
|
|
602
|
+
if (peer === selfHash) {
|
|
603
|
+
continue;
|
|
604
|
+
}
|
|
605
|
+
orderedRemoteRecipients.push(peer);
|
|
606
|
+
}
|
|
607
|
+
for (const peer of set) {
|
|
608
|
+
if (peer === selfHash) {
|
|
609
|
+
continue;
|
|
610
|
+
}
|
|
611
|
+
if (leadersForDelivery.has(peer)) {
|
|
612
|
+
continue;
|
|
613
|
+
}
|
|
614
|
+
orderedRemoteRecipients.push(peer);
|
|
615
|
+
}
|
|
616
|
+
const ackTo = [];
|
|
617
|
+
let silentTo;
|
|
618
|
+
// Default delivery semantics: require enough remote ACKs to reach the requested
|
|
619
|
+
// replication degree (local append counts as 1).
|
|
620
|
+
const ackLimit = settleMin == null ? Math.max(0, minReplicasValue - 1) : settleMin;
|
|
621
|
+
for (const peer of orderedRemoteRecipients) {
|
|
622
|
+
if (ackTo.length < ackLimit) {
|
|
623
|
+
ackTo.push(peer);
|
|
624
|
+
}
|
|
625
|
+
else {
|
|
626
|
+
silentTo ||= [];
|
|
627
|
+
silentTo.push(peer);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
if (requireRecipients && orderedRemoteRecipients.length === 0) {
|
|
631
|
+
throw new NoPeersError(this.rpc.topic);
|
|
632
|
+
}
|
|
633
|
+
if (requireRecipients && ackTo.length + (silentTo?.length || 0) === 0) {
|
|
634
|
+
throw new NoPeersError(this.rpc.topic);
|
|
635
|
+
}
|
|
636
|
+
if (ackTo.length > 0) {
|
|
637
|
+
const payload = serialize(message);
|
|
638
|
+
for (const peer of ackTo) {
|
|
639
|
+
track((async () => {
|
|
640
|
+
// Unified decision point:
|
|
641
|
+
// - If we can prove a cheap direct path (connected or routed), use it.
|
|
642
|
+
// - Otherwise, fall back to the fanout unicast ACK path (bounded overlay routing).
|
|
643
|
+
// - If that fails, fall back to pubsub/RPC routing which may flood to discover routes.
|
|
644
|
+
const pubsub = this.node.services.pubsub;
|
|
645
|
+
const canDirectFast = Boolean(pubsub?.peers?.get?.(peer)?.isWritable) ||
|
|
646
|
+
Boolean(pubsub?.routes?.isReachable?.(pubsub?.publicKeyHash, peer, 0));
|
|
647
|
+
if (canDirectFast) {
|
|
648
|
+
await this.rpc.send(message, {
|
|
649
|
+
mode: new AcknowledgeDelivery({
|
|
650
|
+
redundancy: 1,
|
|
651
|
+
to: [peer],
|
|
652
|
+
}),
|
|
653
|
+
});
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
if (this._fanoutChannel) {
|
|
657
|
+
try {
|
|
658
|
+
await this._fanoutChannel.unicastToAck(peer, payload, fanoutUnicastOptions);
|
|
659
|
+
return;
|
|
660
|
+
}
|
|
661
|
+
catch {
|
|
662
|
+
// fall back below
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
await this.rpc.send(message, {
|
|
666
|
+
mode: new AcknowledgeDelivery({
|
|
667
|
+
redundancy: 1,
|
|
668
|
+
to: [peer],
|
|
669
|
+
}),
|
|
670
|
+
});
|
|
671
|
+
})());
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
if (silentTo?.length) {
|
|
675
|
+
this.rpc
|
|
676
|
+
.send(message, {
|
|
677
|
+
mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
|
|
678
|
+
})
|
|
679
|
+
.catch((error) => logger.error(error));
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
if (pending.length > 0) {
|
|
683
|
+
await Promise.all(pending);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
async _mergeLeadersFromGidReferences(message, minReplicasValue, leaders) {
|
|
687
|
+
const gidReferences = message.heads[0]?.gidRefrences;
|
|
688
|
+
if (!gidReferences || gidReferences.length === 0) {
|
|
689
|
+
return;
|
|
690
|
+
}
|
|
691
|
+
for (const gidReference of gidReferences) {
|
|
692
|
+
const entryFromGid = this.log.entryIndex.getHeads(gidReference, false);
|
|
693
|
+
for (const gidEntry of await entryFromGid.all()) {
|
|
694
|
+
let coordinates = await this.getCoordinates(gidEntry);
|
|
695
|
+
if (coordinates == null) {
|
|
696
|
+
coordinates = await this.createCoordinates(gidEntry, minReplicasValue);
|
|
697
|
+
}
|
|
698
|
+
const found = await this._findLeaders(coordinates);
|
|
699
|
+
for (const [key, value] of found) {
|
|
700
|
+
leaders.set(key, value);
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
async _appendDeliverToAllFanout(entry) {
|
|
706
|
+
for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
|
|
707
|
+
await this._publishExchangeHeadsViaFanout(message);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
async _resolvePublicKeyFromHash(hash) {
|
|
711
|
+
const fanoutService = this.node.services.fanout;
|
|
712
|
+
return (fanoutService?.getPublicKey?.(hash) ??
|
|
713
|
+
this.node.services.pubsub.getPublicKey(hash));
|
|
714
|
+
}
|
|
715
|
+
async _getTopicSubscribers(topic) {
|
|
716
|
+
const maxPeers = 64;
|
|
717
|
+
// Prefer the bounded peer set we already know from the fanout overlay.
|
|
718
|
+
if (this._fanoutChannel && (topic === this.topic || topic === this.rpc.topic)) {
|
|
719
|
+
const hashes = this._fanoutChannel
|
|
720
|
+
.getPeerHashes({ includeSelf: false })
|
|
721
|
+
.slice(0, maxPeers);
|
|
722
|
+
if (hashes.length === 0)
|
|
723
|
+
return [];
|
|
724
|
+
const keys = await Promise.all(hashes.map((hash) => this._resolvePublicKeyFromHash(hash)));
|
|
725
|
+
const uniqueKeys = [];
|
|
726
|
+
const seen = new Set();
|
|
727
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
728
|
+
for (const key of keys) {
|
|
729
|
+
if (!key)
|
|
730
|
+
continue;
|
|
731
|
+
const hash = key.hashcode();
|
|
732
|
+
if (hash === selfHash)
|
|
733
|
+
continue;
|
|
734
|
+
if (seen.has(hash))
|
|
735
|
+
continue;
|
|
736
|
+
seen.add(hash);
|
|
737
|
+
uniqueKeys.push(key);
|
|
738
|
+
}
|
|
739
|
+
return uniqueKeys;
|
|
740
|
+
}
|
|
741
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
742
|
+
const hashes = [];
|
|
743
|
+
// Best-effort provider discovery (bounded). This requires bootstrap trackers.
|
|
744
|
+
try {
|
|
745
|
+
const fanoutService = this.node.services.fanout;
|
|
746
|
+
if (fanoutService?.queryProviders) {
|
|
747
|
+
const ns = `shared-log|${this.topic}`;
|
|
748
|
+
const seed = hashToSeed32(topic);
|
|
749
|
+
const providers = await fanoutService.queryProviders(ns, {
|
|
750
|
+
want: maxPeers,
|
|
751
|
+
seed,
|
|
752
|
+
});
|
|
753
|
+
for (const h of providers ?? []) {
|
|
754
|
+
if (!h || h === selfHash)
|
|
755
|
+
continue;
|
|
756
|
+
hashes.push(h);
|
|
757
|
+
if (hashes.length >= maxPeers)
|
|
758
|
+
break;
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
catch {
|
|
763
|
+
// Best-effort only.
|
|
764
|
+
}
|
|
765
|
+
// Next, use already-connected peer streams (bounded and cheap).
|
|
766
|
+
const peerMap = this.node.services.pubsub
|
|
767
|
+
?.peers;
|
|
768
|
+
if (peerMap?.keys) {
|
|
769
|
+
for (const h of peerMap.keys()) {
|
|
770
|
+
if (!h || h === selfHash)
|
|
771
|
+
continue;
|
|
772
|
+
hashes.push(h);
|
|
773
|
+
if (hashes.length >= maxPeers)
|
|
774
|
+
break;
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
// Finally, fall back to libp2p connections (e.g. bootstrap peers) without requiring
|
|
778
|
+
// any global topic membership view.
|
|
779
|
+
if (hashes.length < maxPeers) {
|
|
780
|
+
const connectionManager = this.node.services.pubsub?.components
|
|
781
|
+
?.connectionManager;
|
|
782
|
+
const connections = connectionManager?.getConnections?.() ?? [];
|
|
783
|
+
for (const conn of connections) {
|
|
784
|
+
const peerId = conn?.remotePeer;
|
|
785
|
+
if (!peerId)
|
|
786
|
+
continue;
|
|
787
|
+
try {
|
|
788
|
+
const h = getPublicKeyFromPeerId(peerId).hashcode();
|
|
789
|
+
if (!h || h === selfHash)
|
|
790
|
+
continue;
|
|
791
|
+
hashes.push(h);
|
|
792
|
+
if (hashes.length >= maxPeers)
|
|
793
|
+
break;
|
|
794
|
+
}
|
|
795
|
+
catch {
|
|
796
|
+
// Best-effort only.
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
if (hashes.length === 0)
|
|
801
|
+
return [];
|
|
802
|
+
const uniqueHashes = [];
|
|
803
|
+
const seen = new Set();
|
|
804
|
+
for (const h of hashes) {
|
|
805
|
+
if (seen.has(h))
|
|
806
|
+
continue;
|
|
807
|
+
seen.add(h);
|
|
808
|
+
uniqueHashes.push(h);
|
|
809
|
+
if (uniqueHashes.length >= maxPeers)
|
|
810
|
+
break;
|
|
811
|
+
}
|
|
812
|
+
const keys = await Promise.all(uniqueHashes.map((hash) => this._resolvePublicKeyFromHash(hash)));
|
|
813
|
+
const uniqueKeys = [];
|
|
814
|
+
for (const key of keys) {
|
|
815
|
+
if (!key)
|
|
816
|
+
continue;
|
|
817
|
+
const hash = key.hashcode();
|
|
818
|
+
if (hash === selfHash)
|
|
819
|
+
continue;
|
|
820
|
+
uniqueKeys.push(key);
|
|
821
|
+
}
|
|
822
|
+
return uniqueKeys;
|
|
823
|
+
}
|
|
286
824
|
// @deprecated
|
|
287
825
|
async getRole() {
|
|
288
826
|
const segments = await this.getMyReplicationSegments();
|
|
@@ -586,6 +1124,7 @@ let SharedLog = (() => {
|
|
|
586
1124
|
})
|
|
587
1125
|
.all();
|
|
588
1126
|
this.uniqueReplicators.delete(keyHash);
|
|
1127
|
+
this._replicatorJoinEmitted.delete(keyHash);
|
|
589
1128
|
await this.replicationIndex.del({ query: { hash: keyHash } });
|
|
590
1129
|
await this.updateOldestTimestampFromIndex();
|
|
591
1130
|
const isMe = this.node.identity.publicKey.hashcode() === keyHash;
|
|
@@ -668,6 +1207,7 @@ let SharedLog = (() => {
|
|
|
668
1207
|
const otherSegmentsIterator = this.replicationIndex.iterate({ query: { hash: from.hashcode() } }, { shape: { id: true } });
|
|
669
1208
|
if ((await otherSegmentsIterator.next(1)).length === 0) {
|
|
670
1209
|
this.uniqueReplicators.delete(from.hashcode());
|
|
1210
|
+
this._replicatorJoinEmitted.delete(from.hashcode());
|
|
671
1211
|
}
|
|
672
1212
|
await otherSegmentsIterator.close();
|
|
673
1213
|
await this.updateOldestTimestampFromIndex();
|
|
@@ -687,6 +1227,7 @@ let SharedLog = (() => {
|
|
|
687
1227
|
rebalance = rebalance == null ? true : rebalance;
|
|
688
1228
|
let diffs;
|
|
689
1229
|
let deleted = undefined;
|
|
1230
|
+
let isStoppedReplicating = false;
|
|
690
1231
|
if (reset) {
|
|
691
1232
|
deleted = (await this.replicationIndex
|
|
692
1233
|
.iterate({
|
|
@@ -718,6 +1259,7 @@ let SharedLog = (() => {
|
|
|
718
1259
|
];
|
|
719
1260
|
}
|
|
720
1261
|
isNewReplicator = prevCount === 0 && ranges.length > 0;
|
|
1262
|
+
isStoppedReplicating = prevCount > 0 && ranges.length === 0;
|
|
721
1263
|
}
|
|
722
1264
|
else {
|
|
723
1265
|
let batchSize = 100;
|
|
@@ -791,7 +1333,16 @@ let SharedLog = (() => {
|
|
|
791
1333
|
.flat();
|
|
792
1334
|
diffs = changes;
|
|
793
1335
|
}
|
|
794
|
-
|
|
1336
|
+
const fromHash = from.hashcode();
|
|
1337
|
+
// Track replicator membership transitions synchronously so join/leave events are
|
|
1338
|
+
// idempotent even if we process concurrent reset messages/unsubscribes.
|
|
1339
|
+
const stoppedTransition = ranges.length === 0 ? this.uniqueReplicators.delete(fromHash) : false;
|
|
1340
|
+
if (ranges.length === 0) {
|
|
1341
|
+
this._replicatorJoinEmitted.delete(fromHash);
|
|
1342
|
+
}
|
|
1343
|
+
else {
|
|
1344
|
+
this.uniqueReplicators.add(fromHash);
|
|
1345
|
+
}
|
|
795
1346
|
let now = +new Date();
|
|
796
1347
|
let minRoleAge = await this.getDefaultMinRoleAge();
|
|
797
1348
|
let isAllMature = true;
|
|
@@ -867,15 +1418,23 @@ let SharedLog = (() => {
|
|
|
867
1418
|
detail: { publicKey: from },
|
|
868
1419
|
}));
|
|
869
1420
|
if (isNewReplicator) {
|
|
870
|
-
this.
|
|
871
|
-
|
|
872
|
-
|
|
1421
|
+
if (!this._replicatorJoinEmitted.has(fromHash)) {
|
|
1422
|
+
this._replicatorJoinEmitted.add(fromHash);
|
|
1423
|
+
this.events.dispatchEvent(new CustomEvent("replicator:join", {
|
|
1424
|
+
detail: { publicKey: from },
|
|
1425
|
+
}));
|
|
1426
|
+
}
|
|
873
1427
|
if (isAllMature) {
|
|
874
1428
|
this.events.dispatchEvent(new CustomEvent("replicator:mature", {
|
|
875
1429
|
detail: { publicKey: from },
|
|
876
1430
|
}));
|
|
877
1431
|
}
|
|
878
1432
|
}
|
|
1433
|
+
if (isStoppedReplicating && stoppedTransition) {
|
|
1434
|
+
this.events.dispatchEvent(new CustomEvent("replicator:leave", {
|
|
1435
|
+
detail: { publicKey: from },
|
|
1436
|
+
}));
|
|
1437
|
+
}
|
|
879
1438
|
if (rebalance) {
|
|
880
1439
|
for (const diff of diffs) {
|
|
881
1440
|
this.replicationChangeDebounceFn.add(diff);
|
|
@@ -895,6 +1454,20 @@ let SharedLog = (() => {
|
|
|
895
1454
|
if (change) {
|
|
896
1455
|
let addedOrReplaced = change.filter((x) => x.type !== "removed");
|
|
897
1456
|
if (addedOrReplaced.length > 0) {
|
|
1457
|
+
// Provider discovery keep-alive (best-effort). This enables bounded targeted fetches
|
|
1458
|
+
// without relying on any global subscriber list.
|
|
1459
|
+
try {
|
|
1460
|
+
const fanoutService = this.node.services.fanout;
|
|
1461
|
+
if (fanoutService?.provide && !this._providerHandle) {
|
|
1462
|
+
this._providerHandle = fanoutService.provide(`shared-log|${this.topic}`, {
|
|
1463
|
+
ttlMs: 120_000,
|
|
1464
|
+
announceIntervalMs: 60_000,
|
|
1465
|
+
});
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1468
|
+
catch {
|
|
1469
|
+
// Best-effort only.
|
|
1470
|
+
}
|
|
898
1471
|
let message = undefined;
|
|
899
1472
|
if (options.reset) {
|
|
900
1473
|
message = new AllReplicatingSegmentsMessage({
|
|
@@ -954,6 +1527,75 @@ let SharedLog = (() => {
|
|
|
954
1527
|
return this.pruneDebouncedFn.add(args);
|
|
955
1528
|
}
|
|
956
1529
|
}
|
|
1530
|
+
clearCheckedPruneRetry(hash) {
|
|
1531
|
+
const state = this._checkedPruneRetries.get(hash);
|
|
1532
|
+
if (state?.timer) {
|
|
1533
|
+
clearTimeout(state.timer);
|
|
1534
|
+
}
|
|
1535
|
+
this._checkedPruneRetries.delete(hash);
|
|
1536
|
+
}
|
|
1537
|
+
scheduleCheckedPruneRetry(args) {
|
|
1538
|
+
if (this.closed)
|
|
1539
|
+
return;
|
|
1540
|
+
if (this._pendingDeletes.has(args.entry.hash))
|
|
1541
|
+
return;
|
|
1542
|
+
const hash = args.entry.hash;
|
|
1543
|
+
const state = this._checkedPruneRetries.get(hash) ?? { attempts: 0 };
|
|
1544
|
+
if (state.timer)
|
|
1545
|
+
return;
|
|
1546
|
+
if (state.attempts >= CHECKED_PRUNE_RETRY_MAX_ATTEMPTS) {
|
|
1547
|
+
// Avoid unbounded background retries; a new replication-change event can
|
|
1548
|
+
// always re-enqueue pruning with fresh leader info.
|
|
1549
|
+
return;
|
|
1550
|
+
}
|
|
1551
|
+
const attempt = state.attempts + 1;
|
|
1552
|
+
const jitterMs = Math.floor(Math.random() * 250);
|
|
1553
|
+
const delayMs = Math.min(CHECKED_PRUNE_RETRY_MAX_DELAY_MS, 1_000 * 2 ** (attempt - 1) + jitterMs);
|
|
1554
|
+
state.attempts = attempt;
|
|
1555
|
+
state.timer = setTimeout(async () => {
|
|
1556
|
+
const st = this._checkedPruneRetries.get(hash);
|
|
1557
|
+
if (st)
|
|
1558
|
+
st.timer = undefined;
|
|
1559
|
+
if (this.closed)
|
|
1560
|
+
return;
|
|
1561
|
+
if (this._pendingDeletes.has(hash))
|
|
1562
|
+
return;
|
|
1563
|
+
let leadersMap;
|
|
1564
|
+
try {
|
|
1565
|
+
const replicas = decodeReplicas(args.entry).getValue(this);
|
|
1566
|
+
leadersMap = await this.findLeadersFromEntry(args.entry, replicas, {
|
|
1567
|
+
roleAge: 0,
|
|
1568
|
+
});
|
|
1569
|
+
}
|
|
1570
|
+
catch {
|
|
1571
|
+
// Best-effort only.
|
|
1572
|
+
}
|
|
1573
|
+
if (!leadersMap || leadersMap.size === 0) {
|
|
1574
|
+
if (args.leaders instanceof Map) {
|
|
1575
|
+
leadersMap = args.leaders;
|
|
1576
|
+
}
|
|
1577
|
+
else {
|
|
1578
|
+
leadersMap = new Map();
|
|
1579
|
+
for (const k of args.leaders) {
|
|
1580
|
+
leadersMap.set(k, { intersecting: true });
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
try {
|
|
1585
|
+
const leadersForRetry = leadersMap ?? new Map();
|
|
1586
|
+
await this.pruneDebouncedFnAddIfNotKeeping({
|
|
1587
|
+
key: hash,
|
|
1588
|
+
// TODO types
|
|
1589
|
+
value: { entry: args.entry, leaders: leadersForRetry },
|
|
1590
|
+
});
|
|
1591
|
+
}
|
|
1592
|
+
catch {
|
|
1593
|
+
// Best-effort only; pruning will be re-attempted on future changes.
|
|
1594
|
+
}
|
|
1595
|
+
}, delayMs);
|
|
1596
|
+
state.timer.unref?.();
|
|
1597
|
+
this._checkedPruneRetries.set(hash, state);
|
|
1598
|
+
}
|
|
957
1599
|
async append(data, options) {
|
|
958
1600
|
const appendOptions = { ...options };
|
|
959
1601
|
const minReplicas = this.getClampedReplicas(options?.replicas
|
|
@@ -1002,239 +1644,18 @@ let SharedLog = (() => {
|
|
|
1002
1644
|
if (options?.target !== "none") {
|
|
1003
1645
|
const target = options?.target;
|
|
1004
1646
|
const deliveryArg = options?.delivery;
|
|
1005
|
-
const
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
? {}
|
|
1009
|
-
: deliveryArg;
|
|
1010
|
-
let requireRecipients = false;
|
|
1011
|
-
let settleMin;
|
|
1012
|
-
let guardDelivery = undefined;
|
|
1013
|
-
let firstDeliveryPromise;
|
|
1014
|
-
let deliveryPromises;
|
|
1015
|
-
let addDeliveryPromise;
|
|
1016
|
-
const leadersForDelivery = delivery && (target === "replicators" || !target)
|
|
1017
|
-
? new Set(leaders.keys())
|
|
1018
|
-
: undefined;
|
|
1019
|
-
if (delivery) {
|
|
1020
|
-
const deliverySettle = delivery.settle ?? true;
|
|
1021
|
-
const deliveryTimeout = delivery.timeout;
|
|
1022
|
-
const deliverySignal = delivery.signal;
|
|
1023
|
-
requireRecipients = delivery.requireRecipients === true;
|
|
1024
|
-
settleMin =
|
|
1025
|
-
typeof deliverySettle === "object" &&
|
|
1026
|
-
Number.isFinite(deliverySettle.min)
|
|
1027
|
-
? Math.max(0, Math.floor(deliverySettle.min))
|
|
1028
|
-
: undefined;
|
|
1029
|
-
guardDelivery =
|
|
1030
|
-
deliveryTimeout == null && deliverySignal == null
|
|
1031
|
-
? undefined
|
|
1032
|
-
: (promise) => new Promise((resolve, reject) => {
|
|
1033
|
-
let settled = false;
|
|
1034
|
-
let timer = undefined;
|
|
1035
|
-
const onAbort = () => {
|
|
1036
|
-
if (settled) {
|
|
1037
|
-
return;
|
|
1038
|
-
}
|
|
1039
|
-
settled = true;
|
|
1040
|
-
promise.catch(() => { });
|
|
1041
|
-
cleanup();
|
|
1042
|
-
reject(new AbortError());
|
|
1043
|
-
};
|
|
1044
|
-
const cleanup = () => {
|
|
1045
|
-
if (timer != null) {
|
|
1046
|
-
clearTimeout(timer);
|
|
1047
|
-
timer = undefined;
|
|
1048
|
-
}
|
|
1049
|
-
deliverySignal?.removeEventListener("abort", onAbort);
|
|
1050
|
-
};
|
|
1051
|
-
if (deliverySignal) {
|
|
1052
|
-
if (deliverySignal.aborted) {
|
|
1053
|
-
onAbort();
|
|
1054
|
-
return;
|
|
1055
|
-
}
|
|
1056
|
-
deliverySignal.addEventListener("abort", onAbort);
|
|
1057
|
-
}
|
|
1058
|
-
if (deliveryTimeout != null) {
|
|
1059
|
-
timer = setTimeout(() => {
|
|
1060
|
-
if (settled) {
|
|
1061
|
-
return;
|
|
1062
|
-
}
|
|
1063
|
-
settled = true;
|
|
1064
|
-
promise.catch(() => { });
|
|
1065
|
-
cleanup();
|
|
1066
|
-
reject(new TimeoutError(`Timeout waiting for delivery`));
|
|
1067
|
-
}, deliveryTimeout);
|
|
1068
|
-
}
|
|
1069
|
-
promise
|
|
1070
|
-
.then(() => {
|
|
1071
|
-
if (settled) {
|
|
1072
|
-
return;
|
|
1073
|
-
}
|
|
1074
|
-
settled = true;
|
|
1075
|
-
cleanup();
|
|
1076
|
-
resolve();
|
|
1077
|
-
})
|
|
1078
|
-
.catch((e) => {
|
|
1079
|
-
if (settled) {
|
|
1080
|
-
return;
|
|
1081
|
-
}
|
|
1082
|
-
settled = true;
|
|
1083
|
-
cleanup();
|
|
1084
|
-
reject(e);
|
|
1085
|
-
});
|
|
1086
|
-
});
|
|
1087
|
-
addDeliveryPromise = (promise) => {
|
|
1088
|
-
if (!firstDeliveryPromise) {
|
|
1089
|
-
firstDeliveryPromise = promise;
|
|
1090
|
-
return;
|
|
1091
|
-
}
|
|
1092
|
-
if (!deliveryPromises) {
|
|
1093
|
-
deliveryPromises = [firstDeliveryPromise, promise];
|
|
1094
|
-
firstDeliveryPromise = undefined;
|
|
1095
|
-
return;
|
|
1096
|
-
}
|
|
1097
|
-
deliveryPromises.push(promise);
|
|
1098
|
-
};
|
|
1647
|
+
const hasDelivery = !(deliveryArg === undefined || deliveryArg === false);
|
|
1648
|
+
if (target === "all" && hasDelivery) {
|
|
1649
|
+
throw new Error(`delivery options are not supported with target="all"; fanout broadcast is fire-and-forward`);
|
|
1099
1650
|
}
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
])) {
|
|
1103
|
-
if (target === "replicators" || !target) {
|
|
1104
|
-
if (message.heads[0].gidRefrences.length > 0) {
|
|
1105
|
-
for (const ref of message.heads[0].gidRefrences) {
|
|
1106
|
-
const entryFromGid = this.log.entryIndex.getHeads(ref, false);
|
|
1107
|
-
for (const entry of await entryFromGid.all()) {
|
|
1108
|
-
let coordinates = await this.getCoordinates(entry);
|
|
1109
|
-
if (coordinates == null) {
|
|
1110
|
-
coordinates = await this.createCoordinates(entry, minReplicasValue);
|
|
1111
|
-
// TODO are we every to come here?
|
|
1112
|
-
}
|
|
1113
|
-
const result = await this._findLeaders(coordinates);
|
|
1114
|
-
for (const [k, v] of result) {
|
|
1115
|
-
leaders.set(k, v);
|
|
1116
|
-
}
|
|
1117
|
-
}
|
|
1118
|
-
}
|
|
1119
|
-
}
|
|
1120
|
-
const set = this.addPeersToGidPeerHistory(result.entry.meta.gid, leaders.keys());
|
|
1121
|
-
let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
1122
|
-
if (!hasRemotePeers) {
|
|
1123
|
-
if (requireRecipients) {
|
|
1124
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1125
|
-
}
|
|
1126
|
-
continue;
|
|
1127
|
-
}
|
|
1128
|
-
if (!delivery) {
|
|
1129
|
-
this.rpc
|
|
1130
|
-
.send(message, {
|
|
1131
|
-
mode: isLeader
|
|
1132
|
-
? new SilentDelivery({ redundancy: 1, to: set })
|
|
1133
|
-
: new AcknowledgeDelivery({ redundancy: 1, to: set }),
|
|
1134
|
-
})
|
|
1135
|
-
.catch((e) => logger.error(e));
|
|
1136
|
-
continue;
|
|
1137
|
-
}
|
|
1138
|
-
let expectedRemoteRecipientsCount = 0;
|
|
1139
|
-
const ackTo = [];
|
|
1140
|
-
let silentTo;
|
|
1141
|
-
const ackLimit = settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
|
|
1142
|
-
// Always settle towards the current expected replicators for this entry,
|
|
1143
|
-
// not the entire gid peer history.
|
|
1144
|
-
for (const peer of leadersForDelivery) {
|
|
1145
|
-
if (peer === selfHash) {
|
|
1146
|
-
continue;
|
|
1147
|
-
}
|
|
1148
|
-
expectedRemoteRecipientsCount++;
|
|
1149
|
-
if (ackTo.length < ackLimit) {
|
|
1150
|
-
ackTo.push(peer);
|
|
1151
|
-
}
|
|
1152
|
-
else {
|
|
1153
|
-
silentTo ||= [];
|
|
1154
|
-
silentTo.push(peer);
|
|
1155
|
-
}
|
|
1156
|
-
}
|
|
1157
|
-
// Still deliver to known peers for the gid (best-effort), but don't let them
|
|
1158
|
-
// satisfy the settle requirement.
|
|
1159
|
-
for (const peer of set) {
|
|
1160
|
-
if (peer === selfHash) {
|
|
1161
|
-
continue;
|
|
1162
|
-
}
|
|
1163
|
-
if (leadersForDelivery.has(peer)) {
|
|
1164
|
-
continue;
|
|
1165
|
-
}
|
|
1166
|
-
silentTo ||= [];
|
|
1167
|
-
silentTo.push(peer);
|
|
1168
|
-
}
|
|
1169
|
-
if (requireRecipients && expectedRemoteRecipientsCount === 0) {
|
|
1170
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1171
|
-
}
|
|
1172
|
-
if (requireRecipients &&
|
|
1173
|
-
ackTo.length + (silentTo?.length || 0) === 0) {
|
|
1174
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1175
|
-
}
|
|
1176
|
-
if (ackTo.length > 0) {
|
|
1177
|
-
const promise = this.rpc.send(message, {
|
|
1178
|
-
mode: new AcknowledgeDelivery({
|
|
1179
|
-
redundancy: 1,
|
|
1180
|
-
to: ackTo,
|
|
1181
|
-
}),
|
|
1182
|
-
});
|
|
1183
|
-
addDeliveryPromise(guardDelivery ? guardDelivery(promise) : promise);
|
|
1184
|
-
}
|
|
1185
|
-
if (silentTo?.length) {
|
|
1186
|
-
this.rpc
|
|
1187
|
-
.send(message, {
|
|
1188
|
-
mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
|
|
1189
|
-
})
|
|
1190
|
-
.catch((e) => logger.error(e));
|
|
1191
|
-
}
|
|
1192
|
-
}
|
|
1193
|
-
else {
|
|
1194
|
-
if (!delivery) {
|
|
1195
|
-
this.rpc.send(message).catch((e) => logger.error(e));
|
|
1196
|
-
continue;
|
|
1197
|
-
}
|
|
1198
|
-
const subscribers = await this.node.services.pubsub.getSubscribers(this.rpc.topic);
|
|
1199
|
-
const ackTo = [];
|
|
1200
|
-
let silentTo;
|
|
1201
|
-
const ackLimit = settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
|
|
1202
|
-
for (const subscriber of subscribers || []) {
|
|
1203
|
-
if (subscriber.hashcode() === selfHash) {
|
|
1204
|
-
continue;
|
|
1205
|
-
}
|
|
1206
|
-
if (ackTo.length < ackLimit) {
|
|
1207
|
-
ackTo.push(subscriber);
|
|
1208
|
-
}
|
|
1209
|
-
else {
|
|
1210
|
-
silentTo ||= [];
|
|
1211
|
-
silentTo.push(subscriber);
|
|
1212
|
-
}
|
|
1213
|
-
}
|
|
1214
|
-
if (requireRecipients &&
|
|
1215
|
-
ackTo.length + (silentTo?.length || 0) === 0) {
|
|
1216
|
-
throw new NoPeersError(this.rpc.topic);
|
|
1217
|
-
}
|
|
1218
|
-
if (ackTo.length > 0) {
|
|
1219
|
-
const promise = this.rpc.send(message, {
|
|
1220
|
-
mode: new AcknowledgeDelivery({ redundancy: 1, to: ackTo }),
|
|
1221
|
-
});
|
|
1222
|
-
addDeliveryPromise(guardDelivery ? guardDelivery(promise) : promise);
|
|
1223
|
-
}
|
|
1224
|
-
if (silentTo?.length) {
|
|
1225
|
-
this.rpc
|
|
1226
|
-
.send(message, {
|
|
1227
|
-
mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
|
|
1228
|
-
})
|
|
1229
|
-
.catch((e) => logger.error(e));
|
|
1230
|
-
}
|
|
1231
|
-
}
|
|
1651
|
+
if (target === "all" && !this._fanoutChannel) {
|
|
1652
|
+
throw new Error(`No fanout channel configured for shared-log topic ${this.topic}`);
|
|
1232
1653
|
}
|
|
1233
|
-
if (
|
|
1234
|
-
await
|
|
1654
|
+
if (target === "all") {
|
|
1655
|
+
await this._appendDeliverToAllFanout(result.entry);
|
|
1235
1656
|
}
|
|
1236
|
-
else
|
|
1237
|
-
await
|
|
1657
|
+
else {
|
|
1658
|
+
await this._appendDeliverToReplicators(result.entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
|
|
1238
1659
|
}
|
|
1239
1660
|
}
|
|
1240
1661
|
if (!isLeader) {
|
|
@@ -1269,9 +1690,13 @@ let SharedLog = (() => {
|
|
|
1269
1690
|
this._pendingDeletes = new Map();
|
|
1270
1691
|
this._pendingIHave = new Map();
|
|
1271
1692
|
this.latestReplicationInfoMessage = new Map();
|
|
1693
|
+
this._replicationInfoBlockedPeers = new Set();
|
|
1694
|
+
this._replicationInfoRequestByPeer = new Map();
|
|
1695
|
+
this._replicationInfoApplyQueueByPeer = new Map();
|
|
1272
1696
|
this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
|
|
1273
1697
|
this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
|
|
1274
1698
|
this.uniqueReplicators = new Set();
|
|
1699
|
+
this._replicatorJoinEmitted = new Set();
|
|
1275
1700
|
this._replicatorsReconciled = false;
|
|
1276
1701
|
this.openTime = +new Date();
|
|
1277
1702
|
this.oldestOpenTime = this.openTime;
|
|
@@ -1298,18 +1723,70 @@ let SharedLog = (() => {
|
|
|
1298
1723
|
throw new Error("waitForReplicatorRequestMaxAttempts must be a positive number");
|
|
1299
1724
|
}
|
|
1300
1725
|
this._closeController = new AbortController();
|
|
1726
|
+
this._closeController.signal.addEventListener("abort", () => {
|
|
1727
|
+
for (const [_peer, state] of this._replicationInfoRequestByPeer) {
|
|
1728
|
+
if (state.timer)
|
|
1729
|
+
clearTimeout(state.timer);
|
|
1730
|
+
}
|
|
1731
|
+
this._replicationInfoRequestByPeer.clear();
|
|
1732
|
+
});
|
|
1301
1733
|
this._isTrustedReplicator = options?.canReplicate;
|
|
1302
1734
|
this.keep = options?.keep;
|
|
1303
1735
|
this.pendingMaturity = new Map();
|
|
1304
1736
|
const id = sha256Base64Sync(this.log.id);
|
|
1305
1737
|
const storage = await this.node.storage.sublevel(id);
|
|
1306
1738
|
const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
|
|
1739
|
+
const fanoutService = this.node.services.fanout;
|
|
1740
|
+
const blockProviderNamespace = (cid) => `cid:${cid}`;
|
|
1307
1741
|
this.remoteBlocks = new RemoteBlocks({
|
|
1308
1742
|
local: localBlocks,
|
|
1309
|
-
publish: (message, options) => this.rpc.send(new BlocksMessage(message), options
|
|
1743
|
+
publish: (message, options) => this.rpc.send(new BlocksMessage(message), options),
|
|
1310
1744
|
waitFor: this.rpc.waitFor.bind(this.rpc),
|
|
1311
1745
|
publicKey: this.node.identity.publicKey,
|
|
1312
1746
|
eagerBlocks: options?.eagerBlocks ?? true,
|
|
1747
|
+
resolveProviders: async (cid, opts) => {
|
|
1748
|
+
// 1) tracker-backed provider directory (best-effort, bounded)
|
|
1749
|
+
try {
|
|
1750
|
+
const providers = await fanoutService?.queryProviders(blockProviderNamespace(cid), {
|
|
1751
|
+
want: 8,
|
|
1752
|
+
timeoutMs: 2_000,
|
|
1753
|
+
queryTimeoutMs: 500,
|
|
1754
|
+
bootstrapMaxPeers: 2,
|
|
1755
|
+
signal: opts?.signal,
|
|
1756
|
+
});
|
|
1757
|
+
if (providers && providers.length > 0)
|
|
1758
|
+
return providers;
|
|
1759
|
+
}
|
|
1760
|
+
catch {
|
|
1761
|
+
// ignore discovery failures
|
|
1762
|
+
}
|
|
1763
|
+
// 2) fallback to currently connected RPC peers
|
|
1764
|
+
const self = this.node.identity.publicKey.hashcode();
|
|
1765
|
+
const out = [];
|
|
1766
|
+
const peers = this.rpc?.peers;
|
|
1767
|
+
for (const h of peers?.keys?.() ?? []) {
|
|
1768
|
+
if (h === self)
|
|
1769
|
+
continue;
|
|
1770
|
+
if (out.includes(h))
|
|
1771
|
+
continue;
|
|
1772
|
+
out.push(h);
|
|
1773
|
+
if (out.length >= 32)
|
|
1774
|
+
break;
|
|
1775
|
+
}
|
|
1776
|
+
return out;
|
|
1777
|
+
},
|
|
1778
|
+
onPut: async (cid) => {
|
|
1779
|
+
// Best-effort directory announce for "get without remote.from" workflows.
|
|
1780
|
+
try {
|
|
1781
|
+
await fanoutService?.announceProvider(blockProviderNamespace(cid), {
|
|
1782
|
+
ttlMs: 120_000,
|
|
1783
|
+
bootstrapMaxPeers: 2,
|
|
1784
|
+
});
|
|
1785
|
+
}
|
|
1786
|
+
catch {
|
|
1787
|
+
// ignore announce failures
|
|
1788
|
+
}
|
|
1789
|
+
},
|
|
1313
1790
|
});
|
|
1314
1791
|
await this.remoteBlocks.start();
|
|
1315
1792
|
const logScope = await this.node.indexer.scope(id);
|
|
@@ -1332,6 +1809,7 @@ let SharedLog = (() => {
|
|
|
1332
1809
|
this._gidPeersHistory = new Map();
|
|
1333
1810
|
this._requestIPruneSent = new Map();
|
|
1334
1811
|
this._requestIPruneResponseReplicatorSet = new Map();
|
|
1812
|
+
this._checkedPruneRetries = new Map();
|
|
1335
1813
|
this.replicationChangeDebounceFn = debounceAggregationChanges((change) => this.onReplicationChange(change).then(() => this.rebalanceParticipationDebounced?.call()), this.distributionDebounceTime);
|
|
1336
1814
|
this.pruneDebouncedFn = debouncedAccumulatorMap((map) => {
|
|
1337
1815
|
this.prune(map);
|
|
@@ -1389,6 +1867,83 @@ let SharedLog = (() => {
|
|
|
1389
1867
|
}, PRUNE_DEBOUNCE_INTERVAL);
|
|
1390
1868
|
await this.log.open(this.remoteBlocks, this.node.identity, {
|
|
1391
1869
|
keychain: this.node.services.keychain,
|
|
1870
|
+
resolveRemotePeers: async (hash, options) => {
|
|
1871
|
+
if (options?.signal?.aborted)
|
|
1872
|
+
return undefined;
|
|
1873
|
+
const maxPeers = 8;
|
|
1874
|
+
const self = this.node.identity.publicKey.hashcode();
|
|
1875
|
+
const seed = hashToSeed32(hash);
|
|
1876
|
+
// Best hint: peers that have recently confirmed having this entry hash.
|
|
1877
|
+
const hinted = this._requestIPruneResponseReplicatorSet.get(hash);
|
|
1878
|
+
if (hinted && hinted.size > 0) {
|
|
1879
|
+
const peers = [...hinted].filter((p) => p !== self);
|
|
1880
|
+
return peers.length > 0
|
|
1881
|
+
? pickDeterministicSubset(peers, seed, maxPeers)
|
|
1882
|
+
: undefined;
|
|
1883
|
+
}
|
|
1884
|
+
// Next: peers we already contacted about this hash (may still have it).
|
|
1885
|
+
const contacted = this._requestIPruneSent.get(hash);
|
|
1886
|
+
if (contacted && contacted.size > 0) {
|
|
1887
|
+
const peers = [...contacted].filter((p) => p !== self);
|
|
1888
|
+
return peers.length > 0
|
|
1889
|
+
? pickDeterministicSubset(peers, seed, maxPeers)
|
|
1890
|
+
: undefined;
|
|
1891
|
+
}
|
|
1892
|
+
let candidates;
|
|
1893
|
+
// Prefer the replicator cache; fall back to subscribers if we have no other signal.
|
|
1894
|
+
const replicatorCandidates = [...this.uniqueReplicators].filter((p) => p !== self);
|
|
1895
|
+
if (replicatorCandidates.length > 0) {
|
|
1896
|
+
candidates = replicatorCandidates;
|
|
1897
|
+
}
|
|
1898
|
+
else {
|
|
1899
|
+
try {
|
|
1900
|
+
const subscribers = await this._getTopicSubscribers(this.topic);
|
|
1901
|
+
const subscriberCandidates = subscribers?.map((k) => k.hashcode()).filter((p) => p !== self) ??
|
|
1902
|
+
[];
|
|
1903
|
+
candidates =
|
|
1904
|
+
subscriberCandidates.length > 0 ? subscriberCandidates : undefined;
|
|
1905
|
+
}
|
|
1906
|
+
catch {
|
|
1907
|
+
// Best-effort only.
|
|
1908
|
+
}
|
|
1909
|
+
if (!candidates || candidates.length === 0) {
|
|
1910
|
+
// Last resort: peers we are already directly connected to. This avoids
|
|
1911
|
+
// depending on global membership knowledge in early-join scenarios.
|
|
1912
|
+
const peerMap = this.node.services.pubsub?.peers;
|
|
1913
|
+
if (peerMap?.keys) {
|
|
1914
|
+
candidates = [...peerMap.keys()];
|
|
1915
|
+
}
|
|
1916
|
+
}
|
|
1917
|
+
if (!candidates || candidates.length === 0) {
|
|
1918
|
+
// Even if the pubsub stream has no established peer streams yet, we may
|
|
1919
|
+
// still have a libp2p connection to one or more peers (e.g. bootstrap).
|
|
1920
|
+
const connectionManager = this.node.services.pubsub?.components
|
|
1921
|
+
?.connectionManager;
|
|
1922
|
+
const connections = connectionManager?.getConnections?.() ?? [];
|
|
1923
|
+
const connectionHashes = [];
|
|
1924
|
+
for (const conn of connections) {
|
|
1925
|
+
const peerId = conn?.remotePeer;
|
|
1926
|
+
if (!peerId)
|
|
1927
|
+
continue;
|
|
1928
|
+
try {
|
|
1929
|
+
connectionHashes.push(getPublicKeyFromPeerId(peerId).hashcode());
|
|
1930
|
+
}
|
|
1931
|
+
catch {
|
|
1932
|
+
// Best-effort only.
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
if (connectionHashes.length > 0) {
|
|
1936
|
+
candidates = connectionHashes;
|
|
1937
|
+
}
|
|
1938
|
+
}
|
|
1939
|
+
}
|
|
1940
|
+
if (!candidates || candidates.length === 0)
|
|
1941
|
+
return undefined;
|
|
1942
|
+
const peers = candidates.filter((p) => p !== self);
|
|
1943
|
+
if (peers.length === 0)
|
|
1944
|
+
return undefined;
|
|
1945
|
+
return pickDeterministicSubset(peers, seed, maxPeers);
|
|
1946
|
+
},
|
|
1392
1947
|
...this._logProperties,
|
|
1393
1948
|
onChange: async (change) => {
|
|
1394
1949
|
await this.onChange(change);
|
|
@@ -1456,6 +2011,7 @@ let SharedLog = (() => {
|
|
|
1456
2011
|
this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
|
|
1457
2012
|
await this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn);
|
|
1458
2013
|
await this.rpc.subscribe();
|
|
2014
|
+
await this._openFanoutChannel(options?.fanout);
|
|
1459
2015
|
// mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
|
|
1460
2016
|
await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
|
|
1461
2017
|
// if we had a previous session with replication info, and new replication info dictates that we unreplicate
|
|
@@ -1518,7 +2074,7 @@ let SharedLog = (() => {
|
|
|
1518
2074
|
});
|
|
1519
2075
|
await this.rebalanceParticipation();
|
|
1520
2076
|
// Take into account existing subscription
|
|
1521
|
-
(await this.
|
|
2077
|
+
(await this._getTopicSubscribers(this.topic))?.forEach((v) => {
|
|
1522
2078
|
if (v.equals(this.node.identity.publicKey)) {
|
|
1523
2079
|
return;
|
|
1524
2080
|
}
|
|
@@ -1551,18 +2107,22 @@ let SharedLog = (() => {
|
|
|
1551
2107
|
})
|
|
1552
2108
|
.then(async () => {
|
|
1553
2109
|
// is reachable, announce change events
|
|
1554
|
-
const key = await this.
|
|
2110
|
+
const key = await this._resolvePublicKeyFromHash(segment.value.hash);
|
|
1555
2111
|
if (!key) {
|
|
1556
2112
|
throw new Error("Failed to resolve public key from hash: " +
|
|
1557
2113
|
segment.value.hash);
|
|
1558
2114
|
}
|
|
1559
|
-
|
|
1560
|
-
this.
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
2115
|
+
const keyHash = key.hashcode();
|
|
2116
|
+
this.uniqueReplicators.add(keyHash);
|
|
2117
|
+
if (!this._replicatorJoinEmitted.has(keyHash)) {
|
|
2118
|
+
this._replicatorJoinEmitted.add(keyHash);
|
|
2119
|
+
this.events.dispatchEvent(new CustomEvent("replicator:join", {
|
|
2120
|
+
detail: { publicKey: key },
|
|
2121
|
+
}));
|
|
2122
|
+
this.events.dispatchEvent(new CustomEvent("replication:change", {
|
|
2123
|
+
detail: { publicKey: key },
|
|
2124
|
+
}));
|
|
2125
|
+
}
|
|
1566
2126
|
})
|
|
1567
2127
|
.catch(async (e) => {
|
|
1568
2128
|
if (isNotStartedError(e)) {
|
|
@@ -1672,26 +2232,33 @@ let SharedLog = (() => {
|
|
|
1672
2232
|
for (const [key, _] of this.syncronizer.syncInFlight) {
|
|
1673
2233
|
set.add(key);
|
|
1674
2234
|
}
|
|
2235
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
1675
2236
|
if (options?.reachableOnly) {
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
//
|
|
1679
|
-
//
|
|
1680
|
-
|
|
1681
|
-
|
|
2237
|
+
const directPeers = this.node.services
|
|
2238
|
+
.pubsub?.peers;
|
|
2239
|
+
// Prefer the live pubsub subscriber set when filtering reachability. In some
|
|
2240
|
+
// flows peers can be reachable/active even before (or without) subscriber
|
|
2241
|
+
// state converging, so also consider direct pubsub peers.
|
|
2242
|
+
const subscribers = (await this._getTopicSubscribers(this.topic)) ?? undefined;
|
|
1682
2243
|
const subscriberHashcodes = subscribers
|
|
1683
2244
|
? new Set(subscribers.map((key) => key.hashcode()))
|
|
1684
2245
|
: undefined;
|
|
2246
|
+
// If reachability is requested but we have no basis for filtering yet
|
|
2247
|
+
// (subscriber snapshot hasn't converged), return the full cover set.
|
|
2248
|
+
// Otherwise, only keep peers we can currently reach.
|
|
2249
|
+
const canFilter = directPeers != null ||
|
|
2250
|
+
(subscriberHashcodes && subscriberHashcodes.size > 0);
|
|
2251
|
+
if (!canFilter) {
|
|
2252
|
+
return [...set];
|
|
2253
|
+
}
|
|
1685
2254
|
const reachable = [];
|
|
1686
|
-
const selfHash = this.node.identity.publicKey.hashcode();
|
|
1687
2255
|
for (const peer of set) {
|
|
1688
2256
|
if (peer === selfHash) {
|
|
1689
2257
|
reachable.push(peer);
|
|
1690
2258
|
continue;
|
|
1691
2259
|
}
|
|
1692
|
-
if (subscriberHashcodes
|
|
1693
|
-
|
|
1694
|
-
: this.uniqueReplicators.has(peer)) {
|
|
2260
|
+
if ((subscriberHashcodes && subscriberHashcodes.has(peer)) ||
|
|
2261
|
+
(directPeers && directPeers.has(peer))) {
|
|
1695
2262
|
reachable.push(peer);
|
|
1696
2263
|
}
|
|
1697
2264
|
}
|
|
@@ -1716,6 +2283,14 @@ let SharedLog = (() => {
|
|
|
1716
2283
|
}
|
|
1717
2284
|
this.pendingMaturity.clear();
|
|
1718
2285
|
this.distributeQueue?.clear();
|
|
2286
|
+
this._closeFanoutChannel();
|
|
2287
|
+
try {
|
|
2288
|
+
this._providerHandle?.close();
|
|
2289
|
+
}
|
|
2290
|
+
catch {
|
|
2291
|
+
// ignore
|
|
2292
|
+
}
|
|
2293
|
+
this._providerHandle = undefined;
|
|
1719
2294
|
this.coordinateToHash.clear();
|
|
1720
2295
|
this.recentlyRebalanced.clear();
|
|
1721
2296
|
this.uniqueReplicators.clear();
|
|
@@ -1730,13 +2305,24 @@ let SharedLog = (() => {
|
|
|
1730
2305
|
for (const [_k, v] of this._pendingIHave) {
|
|
1731
2306
|
v.clear();
|
|
1732
2307
|
}
|
|
2308
|
+
for (const [_k, v] of this._checkedPruneRetries) {
|
|
2309
|
+
if (v.timer)
|
|
2310
|
+
clearTimeout(v.timer);
|
|
2311
|
+
}
|
|
1733
2312
|
await this.remoteBlocks.stop();
|
|
1734
2313
|
this._pendingDeletes.clear();
|
|
1735
2314
|
this._pendingIHave.clear();
|
|
2315
|
+
this._checkedPruneRetries.clear();
|
|
1736
2316
|
this.latestReplicationInfoMessage.clear();
|
|
1737
2317
|
this._gidPeersHistory.clear();
|
|
1738
2318
|
this._requestIPruneSent.clear();
|
|
1739
2319
|
this._requestIPruneResponseReplicatorSet.clear();
|
|
2320
|
+
// Cancel any pending debounced timers so they can't fire after we've torn down
|
|
2321
|
+
// indexes/RPC state.
|
|
2322
|
+
this.rebalanceParticipationDebounced?.close();
|
|
2323
|
+
this.replicationChangeDebounceFn?.close?.();
|
|
2324
|
+
this.pruneDebouncedFn?.close?.();
|
|
2325
|
+
this.responseToPruneDebouncedFn?.close?.();
|
|
1740
2326
|
this.pruneDebouncedFn = undefined;
|
|
1741
2327
|
this.rebalanceParticipationDebounced = undefined;
|
|
1742
2328
|
this._replicationRangeIndex.stop();
|
|
@@ -1747,6 +2333,53 @@ let SharedLog = (() => {
|
|
|
1747
2333
|
/* this._totalParticipation = 0; */
|
|
1748
2334
|
}
|
|
1749
2335
|
async close(from) {
|
|
2336
|
+
// Best-effort: announce that we are going offline before tearing down
|
|
2337
|
+
// RPC/subscription state.
|
|
2338
|
+
//
|
|
2339
|
+
// Important: do not delete our local replication ranges here. Keeping them
|
|
2340
|
+
// allows `replicate: { type: "resume" }` to restore the previous role on
|
|
2341
|
+
// restart. Explicit `unreplicate()` still clears local state.
|
|
2342
|
+
try {
|
|
2343
|
+
if (!this.closed) {
|
|
2344
|
+
// Prevent any late debounced timers (rebalance/prune) from publishing
|
|
2345
|
+
// replication info after we announce "segments: []". These races can leave
|
|
2346
|
+
// stale segments on remotes after rapid open/close cycles.
|
|
2347
|
+
this._isReplicating = false;
|
|
2348
|
+
this._isAdaptiveReplicating = false;
|
|
2349
|
+
this.rebalanceParticipationDebounced?.close();
|
|
2350
|
+
this.replicationChangeDebounceFn?.close?.();
|
|
2351
|
+
this.pruneDebouncedFn?.close?.();
|
|
2352
|
+
this.responseToPruneDebouncedFn?.close?.();
|
|
2353
|
+
// Ensure the "I'm leaving" replication reset is actually published before
|
|
2354
|
+
// the RPC child program closes and unsubscribes from its topic. If we fire
|
|
2355
|
+
// and forget here, the publish can race with `super.close()` and get dropped,
|
|
2356
|
+
// leaving stale replication segments on remotes (flaky join/leave tests).
|
|
2357
|
+
// Also ensure close is bounded even when shard overlays are mid-reconcile.
|
|
2358
|
+
const abort = new AbortController();
|
|
2359
|
+
const abortTimer = setTimeout(() => {
|
|
2360
|
+
try {
|
|
2361
|
+
abort.abort(new TimeoutError("shared-log close replication reset timed out"));
|
|
2362
|
+
}
|
|
2363
|
+
catch {
|
|
2364
|
+
abort.abort();
|
|
2365
|
+
}
|
|
2366
|
+
}, 2_000);
|
|
2367
|
+
try {
|
|
2368
|
+
await this.rpc
|
|
2369
|
+
.send(new AllReplicatingSegmentsMessage({ segments: [] }), {
|
|
2370
|
+
priority: 1,
|
|
2371
|
+
signal: abort.signal,
|
|
2372
|
+
})
|
|
2373
|
+
.catch(() => { });
|
|
2374
|
+
}
|
|
2375
|
+
finally {
|
|
2376
|
+
clearTimeout(abortTimer);
|
|
2377
|
+
}
|
|
2378
|
+
}
|
|
2379
|
+
}
|
|
2380
|
+
catch {
|
|
2381
|
+
// ignore: close should be resilient even if we were never fully started
|
|
2382
|
+
}
|
|
1750
2383
|
const superClosed = await super.close(from);
|
|
1751
2384
|
if (!superClosed) {
|
|
1752
2385
|
return superClosed;
|
|
@@ -1756,6 +2389,41 @@ let SharedLog = (() => {
|
|
|
1756
2389
|
return true;
|
|
1757
2390
|
}
|
|
1758
2391
|
async drop(from) {
|
|
2392
|
+
// Best-effort: announce that we are going offline before tearing down
|
|
2393
|
+
// RPC/subscription state (same reasoning as in `close()`).
|
|
2394
|
+
try {
|
|
2395
|
+
if (!this.closed) {
|
|
2396
|
+
this._isReplicating = false;
|
|
2397
|
+
this._isAdaptiveReplicating = false;
|
|
2398
|
+
this.rebalanceParticipationDebounced?.close();
|
|
2399
|
+
this.replicationChangeDebounceFn?.close?.();
|
|
2400
|
+
this.pruneDebouncedFn?.close?.();
|
|
2401
|
+
this.responseToPruneDebouncedFn?.close?.();
|
|
2402
|
+
const abort = new AbortController();
|
|
2403
|
+
const abortTimer = setTimeout(() => {
|
|
2404
|
+
try {
|
|
2405
|
+
abort.abort(new TimeoutError("shared-log drop replication reset timed out"));
|
|
2406
|
+
}
|
|
2407
|
+
catch {
|
|
2408
|
+
abort.abort();
|
|
2409
|
+
}
|
|
2410
|
+
}, 2_000);
|
|
2411
|
+
try {
|
|
2412
|
+
await this.rpc
|
|
2413
|
+
.send(new AllReplicatingSegmentsMessage({ segments: [] }), {
|
|
2414
|
+
priority: 1,
|
|
2415
|
+
signal: abort.signal,
|
|
2416
|
+
})
|
|
2417
|
+
.catch(() => { });
|
|
2418
|
+
}
|
|
2419
|
+
finally {
|
|
2420
|
+
clearTimeout(abortTimer);
|
|
2421
|
+
}
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
catch {
|
|
2425
|
+
// ignore: drop should be resilient even if we were never fully started
|
|
2426
|
+
}
|
|
1759
2427
|
const superDropped = await super.drop(from);
|
|
1760
2428
|
if (!superDropped) {
|
|
1761
2429
|
return superDropped;
|
|
@@ -2041,7 +2709,7 @@ let SharedLog = (() => {
|
|
|
2041
2709
|
const segments = (await this.getMyReplicationSegments()).map((x) => x.toReplicationRange());
|
|
2042
2710
|
this.rpc
|
|
2043
2711
|
.send(new AllReplicatingSegmentsMessage({ segments }), {
|
|
2044
|
-
mode: new
|
|
2712
|
+
mode: new AcknowledgeDelivery({ to: [context.from], redundancy: 1 }),
|
|
2045
2713
|
})
|
|
2046
2714
|
.catch((e) => logger.error(e.toString()));
|
|
2047
2715
|
// for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
|
|
@@ -2072,33 +2740,55 @@ let SharedLog = (() => {
|
|
|
2072
2740
|
// `Program.waitFor()`. Dropping these messages can lead to missing replicator info
|
|
2073
2741
|
// (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
|
|
2074
2742
|
const from = context.from;
|
|
2743
|
+
const fromHash = from.hashcode();
|
|
2744
|
+
if (this._replicationInfoBlockedPeers.has(fromHash)) {
|
|
2745
|
+
return;
|
|
2746
|
+
}
|
|
2075
2747
|
const messageTimestamp = context.message.header.timestamp;
|
|
2076
|
-
(async () => {
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2748
|
+
await this.withReplicationInfoApplyQueue(fromHash, async () => {
|
|
2749
|
+
try {
|
|
2750
|
+
// The peer may have unsubscribed after this message was queued.
|
|
2751
|
+
if (this._replicationInfoBlockedPeers.has(fromHash)) {
|
|
2752
|
+
return;
|
|
2753
|
+
}
|
|
2754
|
+
// Process in-order to avoid races where repeated reset messages arrive
|
|
2755
|
+
// concurrently and trigger spurious "added" diffs / rebalancing.
|
|
2756
|
+
const prev = this.latestReplicationInfoMessage.get(fromHash);
|
|
2757
|
+
if (prev && prev > messageTimestamp) {
|
|
2758
|
+
return;
|
|
2759
|
+
}
|
|
2760
|
+
this.latestReplicationInfoMessage.set(fromHash, messageTimestamp);
|
|
2761
|
+
if (this.closed) {
|
|
2762
|
+
return;
|
|
2763
|
+
}
|
|
2764
|
+
const reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2765
|
+
await this.addReplicationRange(replicationInfoMessage.segments.map((x) => x.toReplicationRangeIndexable(from)), from, {
|
|
2766
|
+
reset,
|
|
2767
|
+
checkDuplicates: true,
|
|
2768
|
+
timestamp: Number(messageTimestamp),
|
|
2769
|
+
});
|
|
2770
|
+
// If the peer reports any replication segments, stop re-requesting.
|
|
2771
|
+
// (Empty reports can be transient during startup.)
|
|
2772
|
+
if (replicationInfoMessage.segments.length > 0) {
|
|
2773
|
+
this.cancelReplicationInfoRequests(fromHash);
|
|
2774
|
+
}
|
|
2084
2775
|
}
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
});
|
|
2091
|
-
})().catch((e) => {
|
|
2092
|
-
if (isNotStartedError(e)) {
|
|
2093
|
-
return;
|
|
2776
|
+
catch (e) {
|
|
2777
|
+
if (isNotStartedError(e)) {
|
|
2778
|
+
return;
|
|
2779
|
+
}
|
|
2780
|
+
logger.error(`Failed to apply replication settings from '${fromHash}': ${e?.message ?? e}`);
|
|
2094
2781
|
}
|
|
2095
|
-
logger.error(`Failed to apply replication settings from '${from.hashcode()}': ${e?.message ?? e}`);
|
|
2096
2782
|
});
|
|
2097
2783
|
}
|
|
2098
2784
|
else if (msg instanceof StoppedReplicating) {
|
|
2099
2785
|
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2100
2786
|
return;
|
|
2101
2787
|
}
|
|
2788
|
+
const fromHash = context.from.hashcode();
|
|
2789
|
+
if (this._replicationInfoBlockedPeers.has(fromHash)) {
|
|
2790
|
+
return;
|
|
2791
|
+
}
|
|
2102
2792
|
const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(msg.segmentIds, context.from);
|
|
2103
2793
|
await this.removeReplicationRanges(rangesToRemove, context.from);
|
|
2104
2794
|
const timestamp = BigInt(+new Date());
|
|
@@ -2363,12 +3053,17 @@ let SharedLog = (() => {
|
|
|
2363
3053
|
requestTimer = undefined;
|
|
2364
3054
|
}
|
|
2365
3055
|
};
|
|
2366
|
-
const resolve = () => {
|
|
3056
|
+
const resolve = async () => {
|
|
2367
3057
|
if (settled) {
|
|
2368
3058
|
return;
|
|
2369
3059
|
}
|
|
2370
3060
|
settled = true;
|
|
2371
3061
|
clear();
|
|
3062
|
+
// `waitForReplicator()` is typically used as a precondition before join/replicate
|
|
3063
|
+
// flows. A replicator can become mature and enqueue a debounced rebalance
|
|
3064
|
+
// (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
|
|
3065
|
+
// observe a "late" rebalance after the wait resolves.
|
|
3066
|
+
await this.replicationChangeDebounceFn?.flush?.();
|
|
2372
3067
|
deferred.resolve();
|
|
2373
3068
|
};
|
|
2374
3069
|
const reject = (error) => {
|
|
@@ -2400,13 +3095,14 @@ let SharedLog = (() => {
|
|
|
2400
3095
|
requestAttempts++;
|
|
2401
3096
|
this.rpc
|
|
2402
3097
|
.send(new RequestReplicationInfoMessage(), {
|
|
2403
|
-
mode: new
|
|
3098
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [key] }),
|
|
2404
3099
|
})
|
|
2405
3100
|
.catch((e) => {
|
|
2406
3101
|
// Best-effort: missing peers / unopened RPC should not fail the wait logic.
|
|
2407
3102
|
if (isNotStartedError(e)) {
|
|
2408
3103
|
return;
|
|
2409
3104
|
}
|
|
3105
|
+
logger.error(e?.toString?.() ?? String(e));
|
|
2410
3106
|
});
|
|
2411
3107
|
if (requestAttempts < maxRequestAttempts) {
|
|
2412
3108
|
requestTimer = setTimeout(requestReplicationInfo, requestIntervalMs);
|
|
@@ -2425,7 +3121,7 @@ let SharedLog = (() => {
|
|
|
2425
3121
|
return;
|
|
2426
3122
|
}
|
|
2427
3123
|
}
|
|
2428
|
-
resolve();
|
|
3124
|
+
await resolve();
|
|
2429
3125
|
}
|
|
2430
3126
|
catch (error) {
|
|
2431
3127
|
reject(error instanceof Error ? error : new Error(String(error)));
|
|
@@ -2441,48 +3137,68 @@ let SharedLog = (() => {
|
|
|
2441
3137
|
return deferred.promise.finally(clear);
|
|
2442
3138
|
}
|
|
2443
3139
|
async waitForReplicators(options) {
|
|
2444
|
-
// if no remotes, just return
|
|
2445
|
-
const subscribers = await this.node.services.pubsub.getSubscribers(this.rpc.topic);
|
|
2446
|
-
let waitForNewPeers = options?.waitForNewPeers;
|
|
2447
|
-
if (!waitForNewPeers && (subscribers?.length ?? 0) === 0) {
|
|
2448
|
-
throw new NoPeersError(this.rpc.topic);
|
|
2449
|
-
}
|
|
2450
3140
|
let coverageThreshold = options?.coverageThreshold ?? 1;
|
|
2451
3141
|
let deferred = pDefer();
|
|
3142
|
+
let settled = false;
|
|
2452
3143
|
const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge());
|
|
2453
3144
|
const providedCustomRoleAge = options?.roleAge != null;
|
|
2454
|
-
|
|
3145
|
+
const resolve = () => {
|
|
3146
|
+
if (settled)
|
|
3147
|
+
return;
|
|
3148
|
+
settled = true;
|
|
3149
|
+
deferred.resolve();
|
|
3150
|
+
};
|
|
3151
|
+
const reject = (error) => {
|
|
3152
|
+
if (settled)
|
|
3153
|
+
return;
|
|
3154
|
+
settled = true;
|
|
3155
|
+
deferred.reject(error);
|
|
3156
|
+
};
|
|
3157
|
+
let checkInFlight;
|
|
3158
|
+
const checkCoverage = async () => {
|
|
2455
3159
|
const coverage = await this.calculateCoverage({
|
|
2456
3160
|
roleAge,
|
|
2457
3161
|
});
|
|
2458
3162
|
if (coverage >= coverageThreshold) {
|
|
2459
|
-
|
|
3163
|
+
resolve();
|
|
2460
3164
|
return true;
|
|
2461
3165
|
}
|
|
2462
3166
|
return false;
|
|
2463
3167
|
};
|
|
3168
|
+
const scheduleCheckCoverage = () => {
|
|
3169
|
+
if (settled || checkInFlight) {
|
|
3170
|
+
return;
|
|
3171
|
+
}
|
|
3172
|
+
checkInFlight = checkCoverage()
|
|
3173
|
+
.then(() => { })
|
|
3174
|
+
.catch(reject)
|
|
3175
|
+
.finally(() => {
|
|
3176
|
+
checkInFlight = undefined;
|
|
3177
|
+
});
|
|
3178
|
+
};
|
|
2464
3179
|
const onReplicatorMature = () => {
|
|
2465
|
-
|
|
3180
|
+
scheduleCheckCoverage();
|
|
2466
3181
|
};
|
|
2467
3182
|
const onReplicationChange = () => {
|
|
2468
|
-
|
|
3183
|
+
scheduleCheckCoverage();
|
|
2469
3184
|
};
|
|
2470
3185
|
this.events.addEventListener("replicator:mature", onReplicatorMature);
|
|
2471
3186
|
this.events.addEventListener("replication:change", onReplicationChange);
|
|
2472
|
-
await checkCoverage();
|
|
2473
|
-
let
|
|
3187
|
+
await checkCoverage().catch(reject);
|
|
3188
|
+
let intervalMs = providedCustomRoleAge ? 100 : 250;
|
|
3189
|
+
let interval = roleAge > 0
|
|
2474
3190
|
? setInterval(() => {
|
|
2475
|
-
|
|
2476
|
-
},
|
|
3191
|
+
scheduleCheckCoverage();
|
|
3192
|
+
}, intervalMs)
|
|
2477
3193
|
: undefined;
|
|
2478
3194
|
let timeout = options?.timeout ?? this.waitForReplicatorTimeout;
|
|
2479
3195
|
const timer = setTimeout(() => {
|
|
2480
3196
|
clear();
|
|
2481
|
-
|
|
3197
|
+
reject(new TimeoutError(`Timeout waiting for mature replicators`));
|
|
2482
3198
|
}, timeout);
|
|
2483
3199
|
const abortListener = () => {
|
|
2484
3200
|
clear();
|
|
2485
|
-
|
|
3201
|
+
reject(new AbortError());
|
|
2486
3202
|
};
|
|
2487
3203
|
if (options?.signal) {
|
|
2488
3204
|
options.signal.addEventListener("abort", abortListener);
|
|
@@ -2602,9 +3318,7 @@ let SharedLog = (() => {
|
|
|
2602
3318
|
let subscribers = 1;
|
|
2603
3319
|
if (!this.rpc.closed) {
|
|
2604
3320
|
try {
|
|
2605
|
-
subscribers =
|
|
2606
|
-
(await this.node.services.pubsub.getSubscribers(this.rpc.topic))
|
|
2607
|
-
?.length ?? 1;
|
|
3321
|
+
subscribers = (await this._getTopicSubscribers(this.rpc.topic))?.length ?? 1;
|
|
2608
3322
|
}
|
|
2609
3323
|
catch {
|
|
2610
3324
|
// Best-effort only; fall back to 1.
|
|
@@ -2671,22 +3385,31 @@ let SharedLog = (() => {
|
|
|
2671
3385
|
async _findLeaders(cursors, options) {
|
|
2672
3386
|
const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
|
|
2673
3387
|
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2674
|
-
//
|
|
2675
|
-
//
|
|
2676
|
-
//
|
|
3388
|
+
// Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
|
|
3389
|
+
// Falling back to live pubsub subscribers can include non-replicators and can
|
|
3390
|
+
// break delivery/availability when writers are not directly connected.
|
|
2677
3391
|
let peerFilter = undefined;
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
3392
|
+
const selfReplicating = await this.isReplicating();
|
|
3393
|
+
if (this.uniqueReplicators.size > 0) {
|
|
3394
|
+
peerFilter = new Set(this.uniqueReplicators);
|
|
3395
|
+
if (selfReplicating) {
|
|
3396
|
+
peerFilter.add(selfHash);
|
|
3397
|
+
}
|
|
3398
|
+
else {
|
|
3399
|
+
peerFilter.delete(selfHash);
|
|
3400
|
+
}
|
|
2682
3401
|
}
|
|
2683
3402
|
else {
|
|
2684
3403
|
try {
|
|
2685
|
-
const subscribers = (await this.
|
|
2686
|
-
undefined;
|
|
3404
|
+
const subscribers = (await this._getTopicSubscribers(this.topic)) ?? undefined;
|
|
2687
3405
|
if (subscribers && subscribers.length > 0) {
|
|
2688
3406
|
peerFilter = new Set(subscribers.map((key) => key.hashcode()));
|
|
2689
|
-
|
|
3407
|
+
if (selfReplicating) {
|
|
3408
|
+
peerFilter.add(selfHash);
|
|
3409
|
+
}
|
|
3410
|
+
else {
|
|
3411
|
+
peerFilter.delete(selfHash);
|
|
3412
|
+
}
|
|
2690
3413
|
}
|
|
2691
3414
|
}
|
|
2692
3415
|
catch {
|
|
@@ -2709,28 +3432,99 @@ let SharedLog = (() => {
|
|
|
2709
3432
|
replicas: maxReplicas(this, [entry]),
|
|
2710
3433
|
}, options);
|
|
2711
3434
|
}
|
|
3435
|
+
withReplicationInfoApplyQueue(peerHash, fn) {
|
|
3436
|
+
const prev = this._replicationInfoApplyQueueByPeer.get(peerHash);
|
|
3437
|
+
const next = (prev ?? Promise.resolve())
|
|
3438
|
+
.catch(() => {
|
|
3439
|
+
// Avoid stuck queues if a previous apply failed.
|
|
3440
|
+
})
|
|
3441
|
+
.then(fn);
|
|
3442
|
+
this._replicationInfoApplyQueueByPeer.set(peerHash, next);
|
|
3443
|
+
return next.finally(() => {
|
|
3444
|
+
if (this._replicationInfoApplyQueueByPeer.get(peerHash) === next) {
|
|
3445
|
+
this._replicationInfoApplyQueueByPeer.delete(peerHash);
|
|
3446
|
+
}
|
|
3447
|
+
});
|
|
3448
|
+
}
|
|
3449
|
+
cancelReplicationInfoRequests(peerHash) {
|
|
3450
|
+
const state = this._replicationInfoRequestByPeer.get(peerHash);
|
|
3451
|
+
if (!state)
|
|
3452
|
+
return;
|
|
3453
|
+
if (state.timer) {
|
|
3454
|
+
clearTimeout(state.timer);
|
|
3455
|
+
}
|
|
3456
|
+
this._replicationInfoRequestByPeer.delete(peerHash);
|
|
3457
|
+
}
|
|
3458
|
+
scheduleReplicationInfoRequests(peer) {
|
|
3459
|
+
const peerHash = peer.hashcode();
|
|
3460
|
+
if (this._replicationInfoRequestByPeer.has(peerHash)) {
|
|
3461
|
+
return;
|
|
3462
|
+
}
|
|
3463
|
+
const state = {
|
|
3464
|
+
attempts: 0,
|
|
3465
|
+
};
|
|
3466
|
+
this._replicationInfoRequestByPeer.set(peerHash, state);
|
|
3467
|
+
const intervalMs = Math.max(50, this.waitForReplicatorRequestIntervalMs);
|
|
3468
|
+
const maxAttempts = Math.min(5, this.waitForReplicatorRequestMaxAttempts ??
|
|
3469
|
+
WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS);
|
|
3470
|
+
const tick = () => {
|
|
3471
|
+
if (this.closed || this._closeController.signal.aborted) {
|
|
3472
|
+
this.cancelReplicationInfoRequests(peerHash);
|
|
3473
|
+
return;
|
|
3474
|
+
}
|
|
3475
|
+
state.attempts++;
|
|
3476
|
+
this.rpc
|
|
3477
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3478
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [peer] }),
|
|
3479
|
+
})
|
|
3480
|
+
.catch((e) => {
|
|
3481
|
+
// Best-effort: missing peers / unopened RPC should not fail join flows.
|
|
3482
|
+
if (isNotStartedError(e)) {
|
|
3483
|
+
return;
|
|
3484
|
+
}
|
|
3485
|
+
logger.error(e?.toString?.() ?? String(e));
|
|
3486
|
+
});
|
|
3487
|
+
if (state.attempts >= maxAttempts) {
|
|
3488
|
+
this.cancelReplicationInfoRequests(peerHash);
|
|
3489
|
+
return;
|
|
3490
|
+
}
|
|
3491
|
+
state.timer = setTimeout(tick, intervalMs);
|
|
3492
|
+
state.timer.unref?.();
|
|
3493
|
+
};
|
|
3494
|
+
tick();
|
|
3495
|
+
}
|
|
2712
3496
|
async handleSubscriptionChange(publicKey, topics, subscribed) {
|
|
2713
3497
|
if (!topics.includes(this.topic)) {
|
|
2714
3498
|
return;
|
|
2715
3499
|
}
|
|
3500
|
+
const peerHash = publicKey.hashcode();
|
|
3501
|
+
if (subscribed) {
|
|
3502
|
+
this._replicationInfoBlockedPeers.delete(peerHash);
|
|
3503
|
+
}
|
|
3504
|
+
else {
|
|
3505
|
+
this._replicationInfoBlockedPeers.add(peerHash);
|
|
3506
|
+
}
|
|
2716
3507
|
if (!subscribed) {
|
|
2717
|
-
|
|
3508
|
+
// Emit replicator:leave at most once per (join -> leave) transition, even if we
|
|
3509
|
+
// concurrently process unsubscribe + replication reset messages for the same peer.
|
|
3510
|
+
const stoppedTransition = this.uniqueReplicators.delete(peerHash);
|
|
3511
|
+
this._replicatorJoinEmitted.delete(peerHash);
|
|
3512
|
+
this.cancelReplicationInfoRequests(peerHash);
|
|
3513
|
+
this.removePeerFromGidPeerHistory(peerHash);
|
|
2718
3514
|
for (const [k, v] of this._requestIPruneSent) {
|
|
2719
|
-
v.delete(
|
|
3515
|
+
v.delete(peerHash);
|
|
2720
3516
|
if (v.size === 0) {
|
|
2721
3517
|
this._requestIPruneSent.delete(k);
|
|
2722
3518
|
}
|
|
2723
3519
|
}
|
|
2724
3520
|
for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
|
|
2725
|
-
v.delete(
|
|
3521
|
+
v.delete(peerHash);
|
|
2726
3522
|
if (v.size === 0) {
|
|
2727
3523
|
this._requestIPruneResponseReplicatorSet.delete(k);
|
|
2728
3524
|
}
|
|
2729
3525
|
}
|
|
2730
3526
|
this.syncronizer.onPeerDisconnected(publicKey);
|
|
2731
|
-
|
|
2732
|
-
query: { hash: publicKey.hashcode() },
|
|
2733
|
-
})) > 0 &&
|
|
3527
|
+
stoppedTransition &&
|
|
2734
3528
|
this.events.dispatchEvent(new CustomEvent("replicator:leave", {
|
|
2735
3529
|
detail: { publicKey },
|
|
2736
3530
|
}));
|
|
@@ -2742,14 +3536,14 @@ let SharedLog = (() => {
|
|
|
2742
3536
|
.send(new AllReplicatingSegmentsMessage({
|
|
2743
3537
|
segments: replicationSegments.map((x) => x.toReplicationRange()),
|
|
2744
3538
|
}), {
|
|
2745
|
-
mode: new
|
|
3539
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
|
|
2746
3540
|
})
|
|
2747
3541
|
.catch((e) => logger.error(e.toString()));
|
|
2748
3542
|
if (this.v8Behaviour) {
|
|
2749
3543
|
// for backwards compatibility
|
|
2750
3544
|
this.rpc
|
|
2751
3545
|
.send(new ResponseRoleMessage({ role: await this.getRole() }), {
|
|
2752
|
-
mode: new
|
|
3546
|
+
mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
|
|
2753
3547
|
})
|
|
2754
3548
|
.catch((e) => logger.error(e.toString()));
|
|
2755
3549
|
}
|
|
@@ -2757,11 +3551,7 @@ let SharedLog = (() => {
|
|
|
2757
3551
|
// Request the remote peer's replication info. This makes joins resilient to
|
|
2758
3552
|
// timing-sensitive delivery/order issues where we may miss their initial
|
|
2759
3553
|
// replication announcement.
|
|
2760
|
-
this.
|
|
2761
|
-
.send(new RequestReplicationInfoMessage(), {
|
|
2762
|
-
mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
|
|
2763
|
-
})
|
|
2764
|
-
.catch((e) => logger.error(e.toString()));
|
|
3554
|
+
this.scheduleReplicationInfoRequests(publicKey);
|
|
2765
3555
|
}
|
|
2766
3556
|
else {
|
|
2767
3557
|
await this.removeReplicator(publicKey);
|
|
@@ -2815,6 +3605,7 @@ let SharedLog = (() => {
|
|
|
2815
3605
|
const promises = [];
|
|
2816
3606
|
let peerToEntries = new Map();
|
|
2817
3607
|
let cleanupTimer = [];
|
|
3608
|
+
const explicitTimeout = options?.timeout != null;
|
|
2818
3609
|
for (const { entry, leaders } of entries.values()) {
|
|
2819
3610
|
for (const leader of leaders.keys()) {
|
|
2820
3611
|
let set = peerToEntries.get(leader);
|
|
@@ -2826,7 +3617,28 @@ let SharedLog = (() => {
|
|
|
2826
3617
|
}
|
|
2827
3618
|
const pendingPrev = this._pendingDeletes.get(entry.hash);
|
|
2828
3619
|
if (pendingPrev) {
|
|
2829
|
-
|
|
3620
|
+
// If a background prune is already in-flight, an explicit prune request should
|
|
3621
|
+
// still respect the caller's timeout. Otherwise, tests (and user calls) can
|
|
3622
|
+
// block on the longer "checked prune" timeout derived from
|
|
3623
|
+
// `_respondToIHaveTimeout + waitForReplicatorTimeout`, which is intentionally
|
|
3624
|
+
// large for resiliency.
|
|
3625
|
+
if (explicitTimeout) {
|
|
3626
|
+
const timeoutMs = Math.max(0, Math.floor(options?.timeout ?? 0));
|
|
3627
|
+
promises.push(new Promise((resolve, reject) => {
|
|
3628
|
+
// Mirror the checked-prune error prefix so existing callers/tests can
|
|
3629
|
+
// match on the message substring.
|
|
3630
|
+
const timer = setTimeout(() => {
|
|
3631
|
+
reject(new Error(`Timeout for checked pruning after ${timeoutMs}ms (pending=true closed=${this.closed})`));
|
|
3632
|
+
}, timeoutMs);
|
|
3633
|
+
timer.unref?.();
|
|
3634
|
+
pendingPrev.promise.promise
|
|
3635
|
+
.then(resolve, reject)
|
|
3636
|
+
.finally(() => clearTimeout(timer));
|
|
3637
|
+
}));
|
|
3638
|
+
}
|
|
3639
|
+
else {
|
|
3640
|
+
promises.push(pendingPrev.promise.promise);
|
|
3641
|
+
}
|
|
2830
3642
|
continue;
|
|
2831
3643
|
}
|
|
2832
3644
|
const minReplicas = decodeReplicas(entry);
|
|
@@ -2840,6 +3652,7 @@ let SharedLog = (() => {
|
|
|
2840
3652
|
};
|
|
2841
3653
|
const resolve = () => {
|
|
2842
3654
|
clear();
|
|
3655
|
+
this.clearCheckedPruneRetry(entry.hash);
|
|
2843
3656
|
cleanupTimer.push(setTimeout(async () => {
|
|
2844
3657
|
this._gidPeersHistory.delete(entry.meta.gid);
|
|
2845
3658
|
this.removePruneRequestSent(entry.hash);
|
|
@@ -2877,6 +3690,12 @@ let SharedLog = (() => {
|
|
|
2877
3690
|
};
|
|
2878
3691
|
const reject = (e) => {
|
|
2879
3692
|
clear();
|
|
3693
|
+
const isCheckedPruneTimeout = e instanceof Error &&
|
|
3694
|
+
typeof e.message === "string" &&
|
|
3695
|
+
e.message.startsWith("Timeout for checked pruning");
|
|
3696
|
+
if (explicitTimeout || !isCheckedPruneTimeout) {
|
|
3697
|
+
this.clearCheckedPruneRetry(entry.hash);
|
|
3698
|
+
}
|
|
2880
3699
|
this.removePruneRequestSent(entry.hash);
|
|
2881
3700
|
this._requestIPruneResponseReplicatorSet.delete(entry.hash);
|
|
2882
3701
|
deferredPromise.reject(e);
|
|
@@ -2892,6 +3711,12 @@ let SharedLog = (() => {
|
|
|
2892
3711
|
this.waitForReplicatorTimeout +
|
|
2893
3712
|
PRUNE_DEBOUNCE_INTERVAL * 2);
|
|
2894
3713
|
const timeout = setTimeout(() => {
|
|
3714
|
+
// For internal/background prune flows (no explicit timeout), retry a few times
|
|
3715
|
+
// to avoid "permanently prunable" entries when `_pendingIHave` expires under
|
|
3716
|
+
// heavy load.
|
|
3717
|
+
if (!explicitTimeout) {
|
|
3718
|
+
this.scheduleCheckedPruneRetry({ entry, leaders });
|
|
3719
|
+
}
|
|
2895
3720
|
reject(new Error(`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`));
|
|
2896
3721
|
}, checkedPruneTimeoutMs);
|
|
2897
3722
|
timeout.unref?.();
|
|
@@ -2921,6 +3746,8 @@ let SharedLog = (() => {
|
|
|
2921
3746
|
this._requestIPruneResponseReplicatorSet.set(entry.hash, existCounter);
|
|
2922
3747
|
}
|
|
2923
3748
|
existCounter.add(publicKeyHash);
|
|
3749
|
+
// Seed provider hints so future remote reads can avoid extra round-trips.
|
|
3750
|
+
this.remoteBlocks.hintProviders(entry.hash, [publicKeyHash]);
|
|
2924
3751
|
if (minReplicasValue <= existCounter.size) {
|
|
2925
3752
|
resolve();
|
|
2926
3753
|
}
|
|
@@ -2958,6 +3785,37 @@ let SharedLog = (() => {
|
|
|
2958
3785
|
for (const [k, v] of peerToEntries) {
|
|
2959
3786
|
emitMessages(v, k);
|
|
2960
3787
|
}
|
|
3788
|
+
// Keep remote `_pendingIHave` alive in the common "leader doesn't have entry yet"
|
|
3789
|
+
// case. This is intentionally disabled when an explicit timeout is provided to
|
|
3790
|
+
// preserve unit tests that assert remote `_pendingIHave` clears promptly.
|
|
3791
|
+
if (!explicitTimeout && peerToEntries.size > 0) {
|
|
3792
|
+
const respondToIHaveTimeout = Number(this._respondToIHaveTimeout ?? 0);
|
|
3793
|
+
const resendIntervalMs = Math.min(CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS, Math.max(CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS, Math.floor(respondToIHaveTimeout / 2) || 1_000));
|
|
3794
|
+
let inFlight = false;
|
|
3795
|
+
const timer = setInterval(() => {
|
|
3796
|
+
if (inFlight)
|
|
3797
|
+
return;
|
|
3798
|
+
if (this.closed)
|
|
3799
|
+
return;
|
|
3800
|
+
const pendingByPeer = [];
|
|
3801
|
+
for (const [peer, hashes] of peerToEntries) {
|
|
3802
|
+
const pending = hashes.filter((h) => this._pendingDeletes.has(h));
|
|
3803
|
+
if (pending.length > 0) {
|
|
3804
|
+
pendingByPeer.push([peer, pending]);
|
|
3805
|
+
}
|
|
3806
|
+
}
|
|
3807
|
+
if (pendingByPeer.length === 0) {
|
|
3808
|
+
clearInterval(timer);
|
|
3809
|
+
return;
|
|
3810
|
+
}
|
|
3811
|
+
inFlight = true;
|
|
3812
|
+
Promise.allSettled(pendingByPeer.map(([peer, hashes]) => emitMessages(hashes, peer).catch(() => { }))).finally(() => {
|
|
3813
|
+
inFlight = false;
|
|
3814
|
+
});
|
|
3815
|
+
}, resendIntervalMs);
|
|
3816
|
+
timer.unref?.();
|
|
3817
|
+
cleanupTimer.push(timer);
|
|
3818
|
+
}
|
|
2961
3819
|
let cleanup = () => {
|
|
2962
3820
|
for (const timer of cleanupTimer) {
|
|
2963
3821
|
clearTimeout(timer);
|
|
@@ -3014,14 +3872,31 @@ let SharedLog = (() => {
|
|
|
3014
3872
|
return;
|
|
3015
3873
|
}
|
|
3016
3874
|
await this.log.trim();
|
|
3875
|
+
const batchedChanges = Array.isArray(changeOrChanges[0])
|
|
3876
|
+
? changeOrChanges
|
|
3877
|
+
: [changeOrChanges];
|
|
3878
|
+
const changes = batchedChanges.flat();
|
|
3879
|
+
// On removed ranges (peer leaves / shrink), gid-level history can hide
|
|
3880
|
+
// per-entry gaps. Force a fresh delivery pass for reassigned entries.
|
|
3881
|
+
const forceFreshDelivery = changes.some((change) => change.type === "removed");
|
|
3882
|
+
const gidPeersHistorySnapshot = new Map();
|
|
3017
3883
|
const changed = false;
|
|
3018
3884
|
try {
|
|
3019
3885
|
const uncheckedDeliver = new Map();
|
|
3020
|
-
for await (const entryReplicated of toRebalance(
|
|
3886
|
+
for await (const entryReplicated of toRebalance(changes, this.entryCoordinatesIndex, this.recentlyRebalanced)) {
|
|
3021
3887
|
if (this.closed) {
|
|
3022
3888
|
break;
|
|
3023
3889
|
}
|
|
3024
|
-
let oldPeersSet
|
|
3890
|
+
let oldPeersSet;
|
|
3891
|
+
if (!forceFreshDelivery) {
|
|
3892
|
+
const gid = entryReplicated.gid;
|
|
3893
|
+
oldPeersSet = gidPeersHistorySnapshot.get(gid);
|
|
3894
|
+
if (!gidPeersHistorySnapshot.has(gid)) {
|
|
3895
|
+
const existing = this._gidPeersHistory.get(gid);
|
|
3896
|
+
oldPeersSet = existing ? new Set(existing) : undefined;
|
|
3897
|
+
gidPeersHistorySnapshot.set(gid, oldPeersSet);
|
|
3898
|
+
}
|
|
3899
|
+
}
|
|
3025
3900
|
let isLeader = false;
|
|
3026
3901
|
let currentPeers = await this.findLeaders(entryReplicated.coordinates, entryReplicated, {
|
|
3027
3902
|
// we do this to make sure new replicators get data even though they are not mature so they can figure out if they want to replicate more or less
|
|
@@ -3085,12 +3960,28 @@ let SharedLog = (() => {
|
|
|
3085
3960
|
}
|
|
3086
3961
|
async _onUnsubscription(evt) {
|
|
3087
3962
|
logger.trace(`Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(evt.detail.topics.map((x) => x))} '`);
|
|
3088
|
-
|
|
3963
|
+
if (!evt.detail.topics.includes(this.topic)) {
|
|
3964
|
+
return;
|
|
3965
|
+
}
|
|
3966
|
+
const fromHash = evt.detail.from.hashcode();
|
|
3967
|
+
this._replicationInfoBlockedPeers.add(fromHash);
|
|
3968
|
+
// Keep a per-peer timestamp watermark when we observe an unsubscribe. This
|
|
3969
|
+
// prevents late/out-of-order replication-info messages from re-introducing
|
|
3970
|
+
// stale segments for a peer that has already left the topic.
|
|
3971
|
+
const now = BigInt(+new Date());
|
|
3972
|
+
const prev = this.latestReplicationInfoMessage.get(fromHash);
|
|
3973
|
+
if (!prev || prev < now) {
|
|
3974
|
+
this.latestReplicationInfoMessage.set(fromHash, now);
|
|
3975
|
+
}
|
|
3089
3976
|
return this.handleSubscriptionChange(evt.detail.from, evt.detail.topics, false);
|
|
3090
3977
|
}
|
|
3091
3978
|
async _onSubscription(evt) {
|
|
3092
3979
|
logger.trace(`New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(evt.detail.topics.map((x) => x))}'`);
|
|
3980
|
+
if (!evt.detail.topics.includes(this.topic)) {
|
|
3981
|
+
return;
|
|
3982
|
+
}
|
|
3093
3983
|
this.remoteBlocks.onReachable(evt.detail.from);
|
|
3984
|
+
this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
|
|
3094
3985
|
return this.handleSubscriptionChange(evt.detail.from, evt.detail.topics, true);
|
|
3095
3986
|
}
|
|
3096
3987
|
async rebalanceParticipation() {
|