@peerbit/shared-log 12.2.0-62829ef → 12.2.0-6aaa5dd
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/pid-convergence.d.ts +2 -0
- package/dist/benchmark/pid-convergence.d.ts.map +1 -0
- package/dist/benchmark/pid-convergence.js +138 -0
- package/dist/benchmark/pid-convergence.js.map +1 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.d.ts +2 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.d.ts.map +1 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.js +104 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.js.map +1 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.js +17 -1
- package/dist/benchmark/rateless-iblt-startsync-cache.js.map +1 -1
- package/dist/benchmark/sync-catchup.d.ts +3 -0
- package/dist/benchmark/sync-catchup.d.ts.map +1 -0
- package/dist/benchmark/sync-catchup.js +109 -0
- package/dist/benchmark/sync-catchup.js.map +1 -0
- package/dist/src/index.d.ts +8 -2
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +239 -82
- package/dist/src/index.js.map +1 -1
- package/dist/src/ranges.d.ts +1 -0
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +48 -18
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/rateless-iblt.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.js +41 -18
- package/dist/src/sync/rateless-iblt.js.map +1 -1
- package/package.json +18 -18
- package/src/index.ts +297 -125
- package/src/ranges.ts +97 -65
- package/src/sync/rateless-iblt.ts +37 -18
package/dist/src/index.js
CHANGED
|
@@ -169,10 +169,17 @@ const createIndexableDomainFromResolution = (resolution) => {
|
|
|
169
169
|
export const DEFAULT_MIN_REPLICAS = 2;
|
|
170
170
|
export const WAIT_FOR_REPLICATOR_TIMEOUT = 9000;
|
|
171
171
|
export const WAIT_FOR_ROLE_MATURITY = 5000;
|
|
172
|
-
|
|
172
|
+
// TODO(prune): Investigate if/when a non-zero prune delay is required for correctness
|
|
173
|
+
// (e.g. responsibility/replication-info message reordering in multi-peer scenarios).
|
|
174
|
+
// Prefer making pruning robust without timing-based heuristics.
|
|
175
|
+
export const WAIT_FOR_PRUNE_DELAY = 0;
|
|
173
176
|
const PRUNE_DEBOUNCE_INTERVAL = 500;
|
|
174
177
|
// DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
|
|
175
178
|
const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
|
|
179
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE = 0.01;
|
|
180
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
|
|
181
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
|
|
182
|
+
const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
|
|
176
183
|
const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
|
|
177
184
|
const getIdForDynamicRange = (publicKey) => {
|
|
178
185
|
return sha256Sync(concat([publicKey.bytes, new TextEncoder().encode("dynamic")]));
|
|
@@ -217,6 +224,7 @@ let SharedLog = (() => {
|
|
|
217
224
|
coordinateToHash;
|
|
218
225
|
recentlyRebalanced;
|
|
219
226
|
uniqueReplicators;
|
|
227
|
+
_replicatorsReconciled;
|
|
220
228
|
/* private _totalParticipation!: number; */
|
|
221
229
|
// gid -> coordinate -> publicKeyHash list (of owners)
|
|
222
230
|
_gidPeersHistory;
|
|
@@ -296,14 +304,6 @@ let SharedLog = (() => {
|
|
|
296
304
|
}
|
|
297
305
|
setupRebalanceDebounceFunction(interval = RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL) {
|
|
298
306
|
this.rebalanceParticipationDebounced = undefined;
|
|
299
|
-
// make the rebalancing to respect warmup time
|
|
300
|
-
let intervalTime = interval * 2;
|
|
301
|
-
let timeout = setTimeout(() => {
|
|
302
|
-
intervalTime = interval;
|
|
303
|
-
}, this.timeUntilRoleMaturity);
|
|
304
|
-
this._closeController.signal.addEventListener("abort", () => {
|
|
305
|
-
clearTimeout(timeout);
|
|
306
|
-
});
|
|
307
307
|
this.rebalanceParticipationDebounced = debounceFixedInterval(() => this.rebalanceParticipation(),
|
|
308
308
|
/* Math.max(
|
|
309
309
|
REBALANCE_DEBOUNCE_INTERVAL,
|
|
@@ -312,7 +312,7 @@ let SharedLog = (() => {
|
|
|
312
312
|
REBALANCE_DEBOUNCE_INTERVAL
|
|
313
313
|
)
|
|
314
314
|
) */
|
|
315
|
-
|
|
315
|
+
interval);
|
|
316
316
|
}
|
|
317
317
|
async _replicate(options, { reset, checkDuplicates, announce, mergeSegments, rebalance, } = {}) {
|
|
318
318
|
let offsetWasProvided = false;
|
|
@@ -690,15 +690,29 @@ let SharedLog = (() => {
|
|
|
690
690
|
})
|
|
691
691
|
.all()).map((x) => x.value);
|
|
692
692
|
let prevCount = deleted.length;
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
693
|
+
const existingById = new Map(deleted.map((x) => [x.idString, x]));
|
|
694
|
+
const hasSameRanges = deleted.length === ranges.length &&
|
|
695
|
+
ranges.every((range) => {
|
|
696
|
+
const existing = existingById.get(range.idString);
|
|
697
|
+
return existing != null && existing.equalRange(range);
|
|
698
|
+
});
|
|
699
|
+
// Avoid churn on repeated full-state announcements that don't change any
|
|
700
|
+
// replication ranges. This prevents unnecessary `replication:change`
|
|
701
|
+
// events and rebalancing cascades.
|
|
702
|
+
if (hasSameRanges) {
|
|
703
|
+
diffs = [];
|
|
704
|
+
}
|
|
705
|
+
else {
|
|
706
|
+
await this.replicationIndex.del({ query: { hash: from.hashcode() } });
|
|
707
|
+
diffs = [
|
|
708
|
+
...deleted.map((x) => {
|
|
709
|
+
return { range: x, type: "removed", timestamp };
|
|
710
|
+
}),
|
|
711
|
+
...ranges.map((x) => {
|
|
712
|
+
return { range: x, type: "added", timestamp };
|
|
713
|
+
}),
|
|
714
|
+
];
|
|
715
|
+
}
|
|
702
716
|
isNewReplicator = prevCount === 0 && ranges.length > 0;
|
|
703
717
|
}
|
|
704
718
|
else {
|
|
@@ -716,16 +730,18 @@ let SharedLog = (() => {
|
|
|
716
730
|
existing.push(result.value);
|
|
717
731
|
}
|
|
718
732
|
}
|
|
733
|
+
let prevCountForOwner = undefined;
|
|
719
734
|
if (existing.length === 0) {
|
|
720
|
-
|
|
735
|
+
prevCountForOwner = await this.replicationIndex.count({
|
|
721
736
|
query: new StringMatch({ key: "hash", value: from.hashcode() }),
|
|
722
737
|
});
|
|
723
|
-
isNewReplicator =
|
|
738
|
+
isNewReplicator = prevCountForOwner === 0;
|
|
724
739
|
}
|
|
725
740
|
else {
|
|
726
741
|
isNewReplicator = false;
|
|
727
742
|
}
|
|
728
|
-
if (checkDuplicates
|
|
743
|
+
if (checkDuplicates &&
|
|
744
|
+
(existing.length > 0 || (prevCountForOwner ?? 0) > 0)) {
|
|
729
745
|
let deduplicated = [];
|
|
730
746
|
// TODO also deduplicate/de-overlap among the ranges that ought to be inserted?
|
|
731
747
|
for (const range of ranges) {
|
|
@@ -1252,6 +1268,7 @@ let SharedLog = (() => {
|
|
|
1252
1268
|
this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
|
|
1253
1269
|
this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
|
|
1254
1270
|
this.uniqueReplicators = new Set();
|
|
1271
|
+
this._replicatorsReconciled = false;
|
|
1255
1272
|
this.openTime = +new Date();
|
|
1256
1273
|
this.oldestOpenTime = this.openTime;
|
|
1257
1274
|
this.distributionDebounceTime =
|
|
@@ -1259,8 +1276,8 @@ let SharedLog = (() => {
|
|
|
1259
1276
|
this.timeUntilRoleMaturity =
|
|
1260
1277
|
options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
|
|
1261
1278
|
this.waitForReplicatorTimeout =
|
|
1262
|
-
options?.waitForReplicatorTimeout
|
|
1263
|
-
this.waitForPruneDelay = options?.waitForPruneDelay
|
|
1279
|
+
options?.waitForReplicatorTimeout ?? WAIT_FOR_REPLICATOR_TIMEOUT;
|
|
1280
|
+
this.waitForPruneDelay = options?.waitForPruneDelay ?? WAIT_FOR_PRUNE_DELAY;
|
|
1264
1281
|
if (this.waitForReplicatorTimeout < this.timeUntilRoleMaturity) {
|
|
1265
1282
|
this.waitForReplicatorTimeout = this.timeUntilRoleMaturity; // does not makes sense to expect a replicator to mature faster than it is reachable
|
|
1266
1283
|
}
|
|
@@ -1473,7 +1490,16 @@ let SharedLog = (() => {
|
|
|
1473
1490
|
async afterOpen() {
|
|
1474
1491
|
await super.afterOpen();
|
|
1475
1492
|
// We do this here, because these calls requires this.closed == false
|
|
1476
|
-
this.pruneOfflineReplicators()
|
|
1493
|
+
void this.pruneOfflineReplicators()
|
|
1494
|
+
.then(() => {
|
|
1495
|
+
this._replicatorsReconciled = true;
|
|
1496
|
+
})
|
|
1497
|
+
.catch((error) => {
|
|
1498
|
+
if (isNotStartedError(error)) {
|
|
1499
|
+
return;
|
|
1500
|
+
}
|
|
1501
|
+
logger.error(error);
|
|
1502
|
+
});
|
|
1477
1503
|
await this.rebalanceParticipation();
|
|
1478
1504
|
// Take into account existing subscription
|
|
1479
1505
|
(await this.node.services.pubsub.getSubscribers(this.topic))?.forEach((v, k) => {
|
|
@@ -1631,13 +1657,29 @@ let SharedLog = (() => {
|
|
|
1631
1657
|
set.add(key);
|
|
1632
1658
|
}
|
|
1633
1659
|
if (options?.reachableOnly) {
|
|
1634
|
-
|
|
1660
|
+
// Prefer the live pubsub subscriber set when filtering reachability.
|
|
1661
|
+
// `uniqueReplicators` is primarily driven by replication messages and can lag during
|
|
1662
|
+
// joins/restarts; using subscribers prevents excluding peers that are reachable but
|
|
1663
|
+
// whose replication ranges were loaded from disk or haven't been processed yet.
|
|
1664
|
+
const subscribers = (await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
1665
|
+
undefined;
|
|
1666
|
+
const subscriberHashcodes = subscribers
|
|
1667
|
+
? new Set(subscribers.map((key) => key.hashcode()))
|
|
1668
|
+
: undefined;
|
|
1669
|
+
const reachable = [];
|
|
1670
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
1635
1671
|
for (const peer of set) {
|
|
1636
|
-
if (
|
|
1637
|
-
|
|
1672
|
+
if (peer === selfHash) {
|
|
1673
|
+
reachable.push(peer);
|
|
1674
|
+
continue;
|
|
1675
|
+
}
|
|
1676
|
+
if (subscriberHashcodes
|
|
1677
|
+
? subscriberHashcodes.has(peer)
|
|
1678
|
+
: this.uniqueReplicators.has(peer)) {
|
|
1679
|
+
reachable.push(peer);
|
|
1638
1680
|
}
|
|
1639
1681
|
}
|
|
1640
|
-
return
|
|
1682
|
+
return reachable;
|
|
1641
1683
|
}
|
|
1642
1684
|
return [...set];
|
|
1643
1685
|
}
|
|
@@ -1977,15 +2019,15 @@ let SharedLog = (() => {
|
|
|
1977
2019
|
await this.remoteBlocks.onMessage(msg.message, context.from.hashcode());
|
|
1978
2020
|
}
|
|
1979
2021
|
else if (msg instanceof RequestReplicationInfoMessage) {
|
|
1980
|
-
// TODO this message type is never used, should we remove it?
|
|
1981
2022
|
if (context.from.equals(this.node.identity.publicKey)) {
|
|
1982
2023
|
return;
|
|
1983
2024
|
}
|
|
1984
|
-
await this.
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
mode: new
|
|
1988
|
-
})
|
|
2025
|
+
const segments = (await this.getMyReplicationSegments()).map((x) => x.toReplicationRange());
|
|
2026
|
+
this.rpc
|
|
2027
|
+
.send(new AllReplicatingSegmentsMessage({ segments }), {
|
|
2028
|
+
mode: new SeekDelivery({ to: [context.from], redundancy: 1 }),
|
|
2029
|
+
})
|
|
2030
|
+
.catch((e) => logger.error(e.toString()));
|
|
1989
2031
|
// for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
|
|
1990
2032
|
if (this.v8Behaviour) {
|
|
1991
2033
|
const role = this.getRole();
|
|
@@ -2009,38 +2051,32 @@ let SharedLog = (() => {
|
|
|
2009
2051
|
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2010
2052
|
return;
|
|
2011
2053
|
}
|
|
2012
|
-
|
|
2013
|
-
//
|
|
2014
|
-
//
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
.
|
|
2020
|
-
|
|
2021
|
-
// and do them in the right order
|
|
2022
|
-
const prev = this.latestReplicationInfoMessage.get(context.from.hashcode());
|
|
2023
|
-
if (prev && prev > context.message.header.timestamp) {
|
|
2054
|
+
const replicationInfoMessage = msg;
|
|
2055
|
+
// Process replication updates even if the sender isn't yet considered "ready" by
|
|
2056
|
+
// `Program.waitFor()`. Dropping these messages can lead to missing replicator info
|
|
2057
|
+
// (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
|
|
2058
|
+
const from = context.from;
|
|
2059
|
+
const messageTimestamp = context.message.header.timestamp;
|
|
2060
|
+
(async () => {
|
|
2061
|
+
const prev = this.latestReplicationInfoMessage.get(from.hashcode());
|
|
2062
|
+
if (prev && prev > messageTimestamp) {
|
|
2024
2063
|
return;
|
|
2025
2064
|
}
|
|
2026
|
-
this.latestReplicationInfoMessage.set(
|
|
2027
|
-
let reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2065
|
+
this.latestReplicationInfoMessage.set(from.hashcode(), messageTimestamp);
|
|
2028
2066
|
if (this.closed) {
|
|
2029
2067
|
return;
|
|
2030
2068
|
}
|
|
2031
|
-
|
|
2069
|
+
const reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2070
|
+
await this.addReplicationRange(replicationInfoMessage.segments.map((x) => x.toReplicationRangeIndexable(from)), from, {
|
|
2032
2071
|
reset,
|
|
2033
2072
|
checkDuplicates: true,
|
|
2034
|
-
timestamp: Number(
|
|
2073
|
+
timestamp: Number(messageTimestamp),
|
|
2035
2074
|
});
|
|
2036
|
-
|
|
2037
|
-
})
|
|
2038
|
-
.catch((e) => {
|
|
2075
|
+
})().catch((e) => {
|
|
2039
2076
|
if (isNotStartedError(e)) {
|
|
2040
2077
|
return;
|
|
2041
2078
|
}
|
|
2042
|
-
logger.error(
|
|
2043
|
-
e?.message);
|
|
2079
|
+
logger.error(`Failed to apply replication settings from '${from.hashcode()}': ${e?.message ?? e}`);
|
|
2044
2080
|
});
|
|
2045
2081
|
}
|
|
2046
2082
|
else if (msg instanceof StoppedReplicating) {
|
|
@@ -2181,15 +2217,17 @@ let SharedLog = (() => {
|
|
|
2181
2217
|
}
|
|
2182
2218
|
async join(entries, options) {
|
|
2183
2219
|
let entriesToReplicate = [];
|
|
2184
|
-
if (options?.replicate) {
|
|
2220
|
+
if (options?.replicate && this.log.length > 0) {
|
|
2185
2221
|
// TODO this block should perhaps be called from a callback on the this.log.join method on all the ignored element because already joined, like "onAlreadyJoined"
|
|
2186
2222
|
// check which entrise we already have but not are replicating, and replicate them
|
|
2187
2223
|
// we can not just do the 'join' call because it will ignore the already joined entries
|
|
2188
2224
|
for (const element of entries) {
|
|
2189
2225
|
if (typeof element === "string") {
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2226
|
+
if (await this.log.has(element)) {
|
|
2227
|
+
const entry = await this.log.get(element);
|
|
2228
|
+
if (entry) {
|
|
2229
|
+
entriesToReplicate.push(entry);
|
|
2230
|
+
}
|
|
2193
2231
|
}
|
|
2194
2232
|
}
|
|
2195
2233
|
else if (element instanceof Entry) {
|
|
@@ -2198,9 +2236,11 @@ let SharedLog = (() => {
|
|
|
2198
2236
|
}
|
|
2199
2237
|
}
|
|
2200
2238
|
else {
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2239
|
+
if (await this.log.has(element.hash)) {
|
|
2240
|
+
const entry = await this.log.get(element.hash);
|
|
2241
|
+
if (entry) {
|
|
2242
|
+
entriesToReplicate.push(entry);
|
|
2243
|
+
}
|
|
2204
2244
|
}
|
|
2205
2245
|
}
|
|
2206
2246
|
}
|
|
@@ -2293,6 +2333,7 @@ let SharedLog = (() => {
|
|
|
2293
2333
|
: (options?.roleAge ?? (await this.getDefaultMinRoleAge()));
|
|
2294
2334
|
let settled = false;
|
|
2295
2335
|
let timer;
|
|
2336
|
+
let requestTimer;
|
|
2296
2337
|
const clear = () => {
|
|
2297
2338
|
this.events.removeEventListener("replicator:mature", check);
|
|
2298
2339
|
this.events.removeEventListener("replication:change", check);
|
|
@@ -2301,6 +2342,10 @@ let SharedLog = (() => {
|
|
|
2301
2342
|
clearTimeout(timer);
|
|
2302
2343
|
timer = undefined;
|
|
2303
2344
|
}
|
|
2345
|
+
if (requestTimer != null) {
|
|
2346
|
+
clearTimeout(requestTimer);
|
|
2347
|
+
requestTimer = undefined;
|
|
2348
|
+
}
|
|
2304
2349
|
};
|
|
2305
2350
|
const resolve = () => {
|
|
2306
2351
|
if (settled) {
|
|
@@ -2325,6 +2370,31 @@ let SharedLog = (() => {
|
|
|
2325
2370
|
timer = setTimeout(() => {
|
|
2326
2371
|
reject(new TimeoutError(`Timeout waiting for replicator ${key.hashcode()}`));
|
|
2327
2372
|
}, timeoutMs);
|
|
2373
|
+
let requestAttempts = 0;
|
|
2374
|
+
const requestIntervalMs = 1000;
|
|
2375
|
+
const maxRequestAttempts = Math.max(3, Math.ceil(timeoutMs / requestIntervalMs));
|
|
2376
|
+
const requestReplicationInfo = () => {
|
|
2377
|
+
if (settled || this.closed) {
|
|
2378
|
+
return;
|
|
2379
|
+
}
|
|
2380
|
+
if (requestAttempts >= maxRequestAttempts) {
|
|
2381
|
+
return;
|
|
2382
|
+
}
|
|
2383
|
+
requestAttempts++;
|
|
2384
|
+
this.rpc
|
|
2385
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
2386
|
+
mode: new SeekDelivery({ redundancy: 1, to: [key] }),
|
|
2387
|
+
})
|
|
2388
|
+
.catch((e) => {
|
|
2389
|
+
// Best-effort: missing peers / unopened RPC should not fail the wait logic.
|
|
2390
|
+
if (isNotStartedError(e)) {
|
|
2391
|
+
return;
|
|
2392
|
+
}
|
|
2393
|
+
});
|
|
2394
|
+
if (requestAttempts < maxRequestAttempts) {
|
|
2395
|
+
requestTimer = setTimeout(requestReplicationInfo, requestIntervalMs);
|
|
2396
|
+
}
|
|
2397
|
+
};
|
|
2328
2398
|
const check = async () => {
|
|
2329
2399
|
const iterator = this.replicationIndex?.iterate({ query: new StringMatch({ key: "hash", value: key.hashcode() }) }, { reference: true });
|
|
2330
2400
|
try {
|
|
@@ -2347,6 +2417,7 @@ let SharedLog = (() => {
|
|
|
2347
2417
|
await iterator?.close();
|
|
2348
2418
|
}
|
|
2349
2419
|
};
|
|
2420
|
+
requestReplicationInfo();
|
|
2350
2421
|
check();
|
|
2351
2422
|
this.events.addEventListener("replicator:mature", check);
|
|
2352
2423
|
this.events.addEventListener("replication:change", check);
|
|
@@ -2504,15 +2575,49 @@ let SharedLog = (() => {
|
|
|
2504
2575
|
if (this._isReplicating === false) {
|
|
2505
2576
|
return 0;
|
|
2506
2577
|
}
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2578
|
+
// Explicitly disable maturity gating (used by many tests).
|
|
2579
|
+
if (this.timeUntilRoleMaturity <= 0) {
|
|
2580
|
+
return 0;
|
|
2581
|
+
}
|
|
2582
|
+
// If we're alone (or pubsub isn't ready), a fixed maturity time is sufficient.
|
|
2583
|
+
// When there are multiple replicators we want a stable threshold that doesn't
|
|
2584
|
+
// depend on "now" (otherwise it can drift and turn into a flake).
|
|
2585
|
+
let subscribers = 1;
|
|
2586
|
+
if (!this.rpc.closed) {
|
|
2587
|
+
try {
|
|
2588
|
+
subscribers =
|
|
2589
|
+
(await this.node.services.pubsub.getSubscribers(this.rpc.topic))
|
|
2590
|
+
?.length ?? 1;
|
|
2591
|
+
}
|
|
2592
|
+
catch {
|
|
2593
|
+
// Best-effort only; fall back to 1.
|
|
2594
|
+
}
|
|
2595
|
+
}
|
|
2596
|
+
if (subscribers <= 1) {
|
|
2597
|
+
return this.timeUntilRoleMaturity;
|
|
2598
|
+
}
|
|
2599
|
+
// Use replication range timestamps to compute a stable "age gap" between the
|
|
2600
|
+
// newest and oldest known roles. This keeps the oldest role mature while
|
|
2601
|
+
// preventing newer roles from being treated as mature purely because time
|
|
2602
|
+
// passes between test steps / network events.
|
|
2603
|
+
let newestOpenTime = this.openTime;
|
|
2604
|
+
try {
|
|
2605
|
+
const newestIterator = await this.replicationIndex.iterate({
|
|
2606
|
+
sort: [new Sort({ key: "timestamp", direction: "desc" })],
|
|
2607
|
+
}, { shape: { timestamp: true }, reference: true });
|
|
2608
|
+
const newestTimestampFromDB = (await newestIterator.next(1))[0]?.value
|
|
2609
|
+
.timestamp;
|
|
2610
|
+
await newestIterator.close();
|
|
2611
|
+
if (newestTimestampFromDB != null) {
|
|
2612
|
+
newestOpenTime = Number(newestTimestampFromDB);
|
|
2613
|
+
}
|
|
2614
|
+
}
|
|
2615
|
+
catch {
|
|
2616
|
+
// Best-effort only; fall back to local open time.
|
|
2617
|
+
}
|
|
2618
|
+
const ageGapToOldest = newestOpenTime - this.oldestOpenTime;
|
|
2619
|
+
const roleAge = Math.max(this.timeUntilRoleMaturity, ageGapToOldest);
|
|
2620
|
+
return roleAge < 0 ? 0 : roleAge;
|
|
2516
2621
|
}
|
|
2517
2622
|
async findLeaders(cursors, entry, options) {
|
|
2518
2623
|
// we consume a list of coordinates in this method since if we are leader of one coordinate we want to persist all of them
|
|
@@ -2548,8 +2653,32 @@ let SharedLog = (() => {
|
|
|
2548
2653
|
}
|
|
2549
2654
|
async _findLeaders(cursors, options) {
|
|
2550
2655
|
const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
|
|
2656
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2657
|
+
// Use `uniqueReplicators` (replicator cache) once we've reconciled it against the
|
|
2658
|
+
// persisted replication index. Until then, fall back to live pubsub subscribers
|
|
2659
|
+
// and avoid relying on `uniqueReplicators` being complete.
|
|
2660
|
+
let peerFilter = undefined;
|
|
2661
|
+
if (this._replicatorsReconciled && this.uniqueReplicators.size > 0) {
|
|
2662
|
+
peerFilter = this.uniqueReplicators.has(selfHash)
|
|
2663
|
+
? this.uniqueReplicators
|
|
2664
|
+
: new Set([...this.uniqueReplicators, selfHash]);
|
|
2665
|
+
}
|
|
2666
|
+
else {
|
|
2667
|
+
try {
|
|
2668
|
+
const subscribers = (await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
2669
|
+
undefined;
|
|
2670
|
+
if (subscribers && subscribers.length > 0) {
|
|
2671
|
+
peerFilter = new Set(subscribers.map((key) => key.hashcode()));
|
|
2672
|
+
peerFilter.add(selfHash);
|
|
2673
|
+
}
|
|
2674
|
+
}
|
|
2675
|
+
catch {
|
|
2676
|
+
// Best-effort only; if pubsub isn't ready, do a full scan.
|
|
2677
|
+
}
|
|
2678
|
+
}
|
|
2551
2679
|
return getSamples(cursors, this.replicationIndex, roleAge, this.indexableDomain.numbers, {
|
|
2552
|
-
|
|
2680
|
+
peerFilter,
|
|
2681
|
+
uniqueReplicators: peerFilter,
|
|
2553
2682
|
});
|
|
2554
2683
|
}
|
|
2555
2684
|
async findLeadersFromEntry(entry, replicas, options) {
|
|
@@ -2578,7 +2707,7 @@ let SharedLog = (() => {
|
|
|
2578
2707
|
for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
|
|
2579
2708
|
v.delete(publicKey.hashcode());
|
|
2580
2709
|
if (v.size === 0) {
|
|
2581
|
-
this.
|
|
2710
|
+
this._requestIPruneResponseReplicatorSet.delete(k);
|
|
2582
2711
|
}
|
|
2583
2712
|
}
|
|
2584
2713
|
this.syncronizer.onPeerDisconnected(publicKey);
|
|
@@ -2608,6 +2737,14 @@ let SharedLog = (() => {
|
|
|
2608
2737
|
.catch((e) => logger.error(e.toString()));
|
|
2609
2738
|
}
|
|
2610
2739
|
}
|
|
2740
|
+
// Request the remote peer's replication info. This makes joins resilient to
|
|
2741
|
+
// timing-sensitive delivery/order issues where we may miss their initial
|
|
2742
|
+
// replication announcement.
|
|
2743
|
+
this.rpc
|
|
2744
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
2745
|
+
mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
|
|
2746
|
+
})
|
|
2747
|
+
.catch((e) => logger.error(e.toString()));
|
|
2611
2748
|
}
|
|
2612
2749
|
else {
|
|
2613
2750
|
await this.removeReplicator(publicKey);
|
|
@@ -2728,9 +2865,19 @@ let SharedLog = (() => {
|
|
|
2728
2865
|
deferredPromise.reject(e);
|
|
2729
2866
|
};
|
|
2730
2867
|
let cursor = undefined;
|
|
2731
|
-
|
|
2732
|
-
|
|
2733
|
-
|
|
2868
|
+
// Checked prune requests can legitimately take longer than a fixed 10s:
|
|
2869
|
+
// - The remote may not have the entry yet and will wait up to `_respondToIHaveTimeout`
|
|
2870
|
+
// - Leadership/replicator information may take up to `waitForReplicatorTimeout` to settle
|
|
2871
|
+
// If we time out too early we can end up with permanently prunable heads that never
|
|
2872
|
+
// get retried (a common CI flake in "prune before join" tests).
|
|
2873
|
+
const checkedPruneTimeoutMs = options?.timeout ??
|
|
2874
|
+
Math.max(10_000, Number(this._respondToIHaveTimeout ?? 0) +
|
|
2875
|
+
this.waitForReplicatorTimeout +
|
|
2876
|
+
PRUNE_DEBOUNCE_INTERVAL * 2);
|
|
2877
|
+
const timeout = setTimeout(() => {
|
|
2878
|
+
reject(new Error(`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`));
|
|
2879
|
+
}, checkedPruneTimeoutMs);
|
|
2880
|
+
timeout.unref?.();
|
|
2734
2881
|
this._pendingDeletes.set(entry.hash, {
|
|
2735
2882
|
promise: deferredPromise,
|
|
2736
2883
|
clear,
|
|
@@ -2838,8 +2985,8 @@ let SharedLog = (() => {
|
|
|
2838
2985
|
return { range: x, type: "added", timestamp };
|
|
2839
2986
|
}));
|
|
2840
2987
|
}
|
|
2841
|
-
async waitForPruned() {
|
|
2842
|
-
await waitFor(() => this._pendingDeletes.size === 0);
|
|
2988
|
+
async waitForPruned(options) {
|
|
2989
|
+
await waitFor(() => this._pendingDeletes.size === 0, options);
|
|
2843
2990
|
}
|
|
2844
2991
|
async onReplicationChange(changeOrChanges) {
|
|
2845
2992
|
/**
|
|
@@ -2956,9 +3103,19 @@ let SharedLog = (() => {
|
|
|
2956
3103
|
peerCount: peersSize,
|
|
2957
3104
|
cpuUsage: this.cpuUsage?.value(),
|
|
2958
3105
|
});
|
|
2959
|
-
const
|
|
2960
|
-
|
|
2961
|
-
|
|
3106
|
+
const absoluteDifference = Math.abs(dynamicRange.widthNormalized - newFactor);
|
|
3107
|
+
const relativeDifference = absoluteDifference /
|
|
3108
|
+
Math.max(dynamicRange.widthNormalized, RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR);
|
|
3109
|
+
let minRelativeChange = RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE;
|
|
3110
|
+
if (this.replicationController.maxMemoryLimit != null) {
|
|
3111
|
+
minRelativeChange =
|
|
3112
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT;
|
|
3113
|
+
}
|
|
3114
|
+
else if (this.replicationController.maxCPUUsage != null) {
|
|
3115
|
+
minRelativeChange =
|
|
3116
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT;
|
|
3117
|
+
}
|
|
3118
|
+
if (relativeDifference > minRelativeChange) {
|
|
2962
3119
|
// TODO can not reuse old range, since it will (potentially) affect the index because of sideeffects
|
|
2963
3120
|
dynamicRange = new this.indexableDomain.constructorRange({
|
|
2964
3121
|
offset: dynamicRange.start1,
|