@peerbit/shared-log 12.2.0-62829ef → 12.2.0-6aaa5dd
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/pid-convergence.d.ts +2 -0
- package/dist/benchmark/pid-convergence.d.ts.map +1 -0
- package/dist/benchmark/pid-convergence.js +138 -0
- package/dist/benchmark/pid-convergence.js.map +1 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.d.ts +2 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.d.ts.map +1 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.js +104 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.js.map +1 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.js +17 -1
- package/dist/benchmark/rateless-iblt-startsync-cache.js.map +1 -1
- package/dist/benchmark/sync-catchup.d.ts +3 -0
- package/dist/benchmark/sync-catchup.d.ts.map +1 -0
- package/dist/benchmark/sync-catchup.js +109 -0
- package/dist/benchmark/sync-catchup.js.map +1 -0
- package/dist/src/index.d.ts +8 -2
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +239 -82
- package/dist/src/index.js.map +1 -1
- package/dist/src/ranges.d.ts +1 -0
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +48 -18
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/rateless-iblt.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.js +41 -18
- package/dist/src/sync/rateless-iblt.js.map +1 -1
- package/package.json +18 -18
- package/src/index.ts +297 -125
- package/src/ranges.ts +97 -65
- package/src/sync/rateless-iblt.ts +37 -18
package/src/index.ts
CHANGED
|
@@ -376,11 +376,18 @@ export type SharedLogOptions<
|
|
|
376
376
|
export const DEFAULT_MIN_REPLICAS = 2;
|
|
377
377
|
export const WAIT_FOR_REPLICATOR_TIMEOUT = 9000;
|
|
378
378
|
export const WAIT_FOR_ROLE_MATURITY = 5000;
|
|
379
|
-
|
|
379
|
+
// TODO(prune): Investigate if/when a non-zero prune delay is required for correctness
|
|
380
|
+
// (e.g. responsibility/replication-info message reordering in multi-peer scenarios).
|
|
381
|
+
// Prefer making pruning robust without timing-based heuristics.
|
|
382
|
+
export const WAIT_FOR_PRUNE_DELAY = 0;
|
|
380
383
|
const PRUNE_DEBOUNCE_INTERVAL = 500;
|
|
381
384
|
|
|
382
385
|
// DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
|
|
383
386
|
const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
|
|
387
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE = 0.01;
|
|
388
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
|
|
389
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
|
|
390
|
+
const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
|
|
384
391
|
|
|
385
392
|
const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
|
|
386
393
|
|
|
@@ -456,6 +463,7 @@ export class SharedLog<
|
|
|
456
463
|
private recentlyRebalanced!: Cache<string>;
|
|
457
464
|
|
|
458
465
|
uniqueReplicators!: Set<string>;
|
|
466
|
+
private _replicatorsReconciled!: boolean;
|
|
459
467
|
|
|
460
468
|
/* private _totalParticipation!: number; */
|
|
461
469
|
|
|
@@ -617,15 +625,6 @@ export class SharedLog<
|
|
|
617
625
|
) {
|
|
618
626
|
this.rebalanceParticipationDebounced = undefined;
|
|
619
627
|
|
|
620
|
-
// make the rebalancing to respect warmup time
|
|
621
|
-
let intervalTime = interval * 2;
|
|
622
|
-
let timeout = setTimeout(() => {
|
|
623
|
-
intervalTime = interval;
|
|
624
|
-
}, this.timeUntilRoleMaturity);
|
|
625
|
-
this._closeController.signal.addEventListener("abort", () => {
|
|
626
|
-
clearTimeout(timeout);
|
|
627
|
-
});
|
|
628
|
-
|
|
629
628
|
this.rebalanceParticipationDebounced = debounceFixedInterval(
|
|
630
629
|
() => this.rebalanceParticipation(),
|
|
631
630
|
/* Math.max(
|
|
@@ -635,7 +634,7 @@ export class SharedLog<
|
|
|
635
634
|
REBALANCE_DEBOUNCE_INTERVAL
|
|
636
635
|
)
|
|
637
636
|
) */
|
|
638
|
-
|
|
637
|
+
interval, // TODO make this dynamic on the number of replicators
|
|
639
638
|
);
|
|
640
639
|
}
|
|
641
640
|
|
|
@@ -1166,16 +1165,31 @@ export class SharedLog<
|
|
|
1166
1165
|
|
|
1167
1166
|
let prevCount = deleted.length;
|
|
1168
1167
|
|
|
1169
|
-
|
|
1168
|
+
const existingById = new Map(deleted.map((x) => [x.idString, x]));
|
|
1169
|
+
const hasSameRanges =
|
|
1170
|
+
deleted.length === ranges.length &&
|
|
1171
|
+
ranges.every((range) => {
|
|
1172
|
+
const existing = existingById.get(range.idString);
|
|
1173
|
+
return existing != null && existing.equalRange(range);
|
|
1174
|
+
});
|
|
1170
1175
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
})
|
|
1178
|
-
|
|
1176
|
+
// Avoid churn on repeated full-state announcements that don't change any
|
|
1177
|
+
// replication ranges. This prevents unnecessary `replication:change`
|
|
1178
|
+
// events and rebalancing cascades.
|
|
1179
|
+
if (hasSameRanges) {
|
|
1180
|
+
diffs = [];
|
|
1181
|
+
} else {
|
|
1182
|
+
await this.replicationIndex.del({ query: { hash: from.hashcode() } });
|
|
1183
|
+
|
|
1184
|
+
diffs = [
|
|
1185
|
+
...deleted.map((x) => {
|
|
1186
|
+
return { range: x, type: "removed" as const, timestamp };
|
|
1187
|
+
}),
|
|
1188
|
+
...ranges.map((x) => {
|
|
1189
|
+
return { range: x, type: "added" as const, timestamp };
|
|
1190
|
+
}),
|
|
1191
|
+
];
|
|
1192
|
+
}
|
|
1179
1193
|
|
|
1180
1194
|
isNewReplicator = prevCount === 0 && ranges.length > 0;
|
|
1181
1195
|
} else {
|
|
@@ -1198,16 +1212,20 @@ export class SharedLog<
|
|
|
1198
1212
|
}
|
|
1199
1213
|
}
|
|
1200
1214
|
|
|
1215
|
+
let prevCountForOwner: number | undefined = undefined;
|
|
1201
1216
|
if (existing.length === 0) {
|
|
1202
|
-
|
|
1217
|
+
prevCountForOwner = await this.replicationIndex.count({
|
|
1203
1218
|
query: new StringMatch({ key: "hash", value: from.hashcode() }),
|
|
1204
1219
|
});
|
|
1205
|
-
isNewReplicator =
|
|
1220
|
+
isNewReplicator = prevCountForOwner === 0;
|
|
1206
1221
|
} else {
|
|
1207
1222
|
isNewReplicator = false;
|
|
1208
1223
|
}
|
|
1209
1224
|
|
|
1210
|
-
if (
|
|
1225
|
+
if (
|
|
1226
|
+
checkDuplicates &&
|
|
1227
|
+
(existing.length > 0 || (prevCountForOwner ?? 0) > 0)
|
|
1228
|
+
) {
|
|
1211
1229
|
let deduplicated: ReplicationRangeIndexable<any>[] = [];
|
|
1212
1230
|
|
|
1213
1231
|
// TODO also deduplicate/de-overlap among the ranges that ought to be inserted?
|
|
@@ -1874,6 +1892,7 @@ export class SharedLog<
|
|
|
1874
1892
|
this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
|
|
1875
1893
|
|
|
1876
1894
|
this.uniqueReplicators = new Set();
|
|
1895
|
+
this._replicatorsReconciled = false;
|
|
1877
1896
|
|
|
1878
1897
|
this.openTime = +new Date();
|
|
1879
1898
|
this.oldestOpenTime = this.openTime;
|
|
@@ -1883,8 +1902,8 @@ export class SharedLog<
|
|
|
1883
1902
|
this.timeUntilRoleMaturity =
|
|
1884
1903
|
options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
|
|
1885
1904
|
this.waitForReplicatorTimeout =
|
|
1886
|
-
options?.waitForReplicatorTimeout
|
|
1887
|
-
this.waitForPruneDelay = options?.waitForPruneDelay
|
|
1905
|
+
options?.waitForReplicatorTimeout ?? WAIT_FOR_REPLICATOR_TIMEOUT;
|
|
1906
|
+
this.waitForPruneDelay = options?.waitForPruneDelay ?? WAIT_FOR_PRUNE_DELAY;
|
|
1888
1907
|
|
|
1889
1908
|
if (this.waitForReplicatorTimeout < this.timeUntilRoleMaturity) {
|
|
1890
1909
|
this.waitForReplicatorTimeout = this.timeUntilRoleMaturity; // does not makes sense to expect a replicator to mature faster than it is reachable
|
|
@@ -2176,7 +2195,16 @@ export class SharedLog<
|
|
|
2176
2195
|
await super.afterOpen();
|
|
2177
2196
|
|
|
2178
2197
|
// We do this here, because these calls requires this.closed == false
|
|
2179
|
-
this.pruneOfflineReplicators()
|
|
2198
|
+
void this.pruneOfflineReplicators()
|
|
2199
|
+
.then(() => {
|
|
2200
|
+
this._replicatorsReconciled = true;
|
|
2201
|
+
})
|
|
2202
|
+
.catch((error) => {
|
|
2203
|
+
if (isNotStartedError(error as Error)) {
|
|
2204
|
+
return;
|
|
2205
|
+
}
|
|
2206
|
+
logger.error(error);
|
|
2207
|
+
});
|
|
2180
2208
|
|
|
2181
2209
|
await this.rebalanceParticipation();
|
|
2182
2210
|
|
|
@@ -2392,15 +2420,35 @@ export class SharedLog<
|
|
|
2392
2420
|
set.add(key);
|
|
2393
2421
|
}
|
|
2394
2422
|
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2423
|
+
if (options?.reachableOnly) {
|
|
2424
|
+
// Prefer the live pubsub subscriber set when filtering reachability.
|
|
2425
|
+
// `uniqueReplicators` is primarily driven by replication messages and can lag during
|
|
2426
|
+
// joins/restarts; using subscribers prevents excluding peers that are reachable but
|
|
2427
|
+
// whose replication ranges were loaded from disk or haven't been processed yet.
|
|
2428
|
+
const subscribers =
|
|
2429
|
+
(await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
2430
|
+
undefined;
|
|
2431
|
+
const subscriberHashcodes = subscribers
|
|
2432
|
+
? new Set(subscribers.map((key) => key.hashcode()))
|
|
2433
|
+
: undefined;
|
|
2434
|
+
|
|
2435
|
+
const reachable: string[] = [];
|
|
2436
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2437
|
+
for (const peer of set) {
|
|
2438
|
+
if (peer === selfHash) {
|
|
2439
|
+
reachable.push(peer);
|
|
2440
|
+
continue;
|
|
2441
|
+
}
|
|
2442
|
+
if (
|
|
2443
|
+
subscriberHashcodes
|
|
2444
|
+
? subscriberHashcodes.has(peer)
|
|
2445
|
+
: this.uniqueReplicators.has(peer)
|
|
2446
|
+
) {
|
|
2447
|
+
reachable.push(peer);
|
|
2448
|
+
}
|
|
2400
2449
|
}
|
|
2450
|
+
return reachable;
|
|
2401
2451
|
}
|
|
2402
|
-
return reachableSet;
|
|
2403
|
-
}
|
|
2404
2452
|
|
|
2405
2453
|
return [...set];
|
|
2406
2454
|
} catch (error) {
|
|
@@ -2844,22 +2892,20 @@ export class SharedLog<
|
|
|
2844
2892
|
context.from!.hashcode(),
|
|
2845
2893
|
);
|
|
2846
2894
|
} else if (msg instanceof RequestReplicationInfoMessage) {
|
|
2847
|
-
// TODO this message type is never used, should we remove it?
|
|
2848
|
-
|
|
2849
2895
|
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2850
2896
|
return;
|
|
2851
2897
|
}
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
x.toReplicationRange(),
|
|
2856
|
-
),
|
|
2857
|
-
}),
|
|
2858
|
-
{
|
|
2859
|
-
mode: new SilentDelivery({ to: [context.from], redundancy: 1 }),
|
|
2860
|
-
},
|
|
2898
|
+
|
|
2899
|
+
const segments = (await this.getMyReplicationSegments()).map((x) =>
|
|
2900
|
+
x.toReplicationRange(),
|
|
2861
2901
|
);
|
|
2862
2902
|
|
|
2903
|
+
this.rpc
|
|
2904
|
+
.send(new AllReplicatingSegmentsMessage({ segments }), {
|
|
2905
|
+
mode: new SeekDelivery({ to: [context.from], redundancy: 1 }),
|
|
2906
|
+
})
|
|
2907
|
+
.catch((e) => logger.error(e.toString()));
|
|
2908
|
+
|
|
2863
2909
|
// for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
|
|
2864
2910
|
if (this.v8Behaviour) {
|
|
2865
2911
|
const role = this.getRole();
|
|
@@ -2881,73 +2927,60 @@ export class SharedLog<
|
|
|
2881
2927
|
}
|
|
2882
2928
|
}
|
|
2883
2929
|
} else if (
|
|
2884
|
-
|
|
2885
|
-
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
let replicationInfoMessage = msg as
|
|
2892
|
-
| AllReplicatingSegmentsMessage
|
|
2893
|
-
| AddedReplicationSegmentMessage;
|
|
2894
|
-
|
|
2895
|
-
// we have this statement because peers might have changed/announced their role,
|
|
2896
|
-
// but we don't know them as "subscribers" yet. i.e. they are not online
|
|
2897
|
-
|
|
2898
|
-
this.waitFor(context.from, {
|
|
2899
|
-
signal: this._closeController.signal,
|
|
2900
|
-
timeout: this.waitForReplicatorTimeout,
|
|
2901
|
-
})
|
|
2902
|
-
.then(async () => {
|
|
2903
|
-
// do use an operation log here, because we want to make sure that we don't miss any updates
|
|
2904
|
-
// and do them in the right order
|
|
2905
|
-
const prev = this.latestReplicationInfoMessage.get(
|
|
2906
|
-
context.from!.hashcode(),
|
|
2907
|
-
);
|
|
2930
|
+
msg instanceof AllReplicatingSegmentsMessage ||
|
|
2931
|
+
msg instanceof AddedReplicationSegmentMessage
|
|
2932
|
+
) {
|
|
2933
|
+
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2934
|
+
return;
|
|
2935
|
+
}
|
|
2908
2936
|
|
|
2909
|
-
|
|
2937
|
+
const replicationInfoMessage = msg as
|
|
2938
|
+
| AllReplicatingSegmentsMessage
|
|
2939
|
+
| AddedReplicationSegmentMessage;
|
|
2940
|
+
|
|
2941
|
+
// Process replication updates even if the sender isn't yet considered "ready" by
|
|
2942
|
+
// `Program.waitFor()`. Dropping these messages can lead to missing replicator info
|
|
2943
|
+
// (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
|
|
2944
|
+
const from = context.from!;
|
|
2945
|
+
const messageTimestamp = context.message.header.timestamp;
|
|
2946
|
+
(async () => {
|
|
2947
|
+
const prev = this.latestReplicationInfoMessage.get(from.hashcode());
|
|
2948
|
+
if (prev && prev > messageTimestamp) {
|
|
2910
2949
|
return;
|
|
2911
2950
|
}
|
|
2912
2951
|
|
|
2913
|
-
this.latestReplicationInfoMessage.set(
|
|
2914
|
-
context.from!.hashcode(),
|
|
2915
|
-
context.message.header.timestamp,
|
|
2916
|
-
);
|
|
2917
|
-
|
|
2918
|
-
let reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2952
|
+
this.latestReplicationInfoMessage.set(from.hashcode(), messageTimestamp);
|
|
2919
2953
|
|
|
2920
2954
|
if (this.closed) {
|
|
2921
2955
|
return;
|
|
2922
2956
|
}
|
|
2923
2957
|
|
|
2958
|
+
const reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2924
2959
|
await this.addReplicationRange(
|
|
2925
2960
|
replicationInfoMessage.segments.map((x) =>
|
|
2926
|
-
x.toReplicationRangeIndexable(
|
|
2961
|
+
x.toReplicationRangeIndexable(from),
|
|
2927
2962
|
),
|
|
2928
|
-
|
|
2963
|
+
from,
|
|
2929
2964
|
{
|
|
2930
2965
|
reset,
|
|
2931
2966
|
checkDuplicates: true,
|
|
2932
|
-
timestamp: Number(
|
|
2967
|
+
timestamp: Number(messageTimestamp),
|
|
2933
2968
|
},
|
|
2934
2969
|
);
|
|
2935
|
-
|
|
2936
|
-
/* await this._modifyReplicators(msg.role, context.from!); */
|
|
2937
|
-
})
|
|
2938
|
-
.catch((e) => {
|
|
2970
|
+
})().catch((e) => {
|
|
2939
2971
|
if (isNotStartedError(e)) {
|
|
2940
2972
|
return;
|
|
2941
2973
|
}
|
|
2942
2974
|
logger.error(
|
|
2943
|
-
|
|
2944
|
-
e?.message
|
|
2975
|
+
`Failed to apply replication settings from '${from.hashcode()}': ${
|
|
2976
|
+
e?.message ?? e
|
|
2977
|
+
}`,
|
|
2945
2978
|
);
|
|
2946
2979
|
});
|
|
2947
|
-
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
|
|
2980
|
+
} else if (msg instanceof StoppedReplicating) {
|
|
2981
|
+
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2982
|
+
return;
|
|
2983
|
+
}
|
|
2951
2984
|
|
|
2952
2985
|
const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(
|
|
2953
2986
|
msg.segmentIds,
|
|
@@ -3140,25 +3173,29 @@ export class SharedLog<
|
|
|
3140
3173
|
},
|
|
3141
3174
|
): Promise<void> {
|
|
3142
3175
|
let entriesToReplicate: Entry<T>[] = [];
|
|
3143
|
-
if (options?.replicate) {
|
|
3176
|
+
if (options?.replicate && this.log.length > 0) {
|
|
3144
3177
|
// TODO this block should perhaps be called from a callback on the this.log.join method on all the ignored element because already joined, like "onAlreadyJoined"
|
|
3145
3178
|
|
|
3146
3179
|
// check which entrise we already have but not are replicating, and replicate them
|
|
3147
3180
|
// we can not just do the 'join' call because it will ignore the already joined entries
|
|
3148
3181
|
for (const element of entries) {
|
|
3149
3182
|
if (typeof element === "string") {
|
|
3150
|
-
|
|
3151
|
-
|
|
3152
|
-
|
|
3183
|
+
if (await this.log.has(element)) {
|
|
3184
|
+
const entry = await this.log.get(element);
|
|
3185
|
+
if (entry) {
|
|
3186
|
+
entriesToReplicate.push(entry);
|
|
3187
|
+
}
|
|
3153
3188
|
}
|
|
3154
3189
|
} else if (element instanceof Entry) {
|
|
3155
3190
|
if (await this.log.has(element.hash)) {
|
|
3156
3191
|
entriesToReplicate.push(element);
|
|
3157
3192
|
}
|
|
3158
3193
|
} else {
|
|
3159
|
-
|
|
3160
|
-
|
|
3161
|
-
|
|
3194
|
+
if (await this.log.has(element.hash)) {
|
|
3195
|
+
const entry = await this.log.get(element.hash);
|
|
3196
|
+
if (entry) {
|
|
3197
|
+
entriesToReplicate.push(entry);
|
|
3198
|
+
}
|
|
3162
3199
|
}
|
|
3163
3200
|
}
|
|
3164
3201
|
}
|
|
@@ -3277,6 +3314,7 @@ export class SharedLog<
|
|
|
3277
3314
|
|
|
3278
3315
|
let settled = false;
|
|
3279
3316
|
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
3317
|
+
let requestTimer: ReturnType<typeof setTimeout> | undefined;
|
|
3280
3318
|
|
|
3281
3319
|
const clear = () => {
|
|
3282
3320
|
this.events.removeEventListener("replicator:mature", check);
|
|
@@ -3286,6 +3324,10 @@ export class SharedLog<
|
|
|
3286
3324
|
clearTimeout(timer);
|
|
3287
3325
|
timer = undefined;
|
|
3288
3326
|
}
|
|
3327
|
+
if (requestTimer != null) {
|
|
3328
|
+
clearTimeout(requestTimer);
|
|
3329
|
+
requestTimer = undefined;
|
|
3330
|
+
}
|
|
3289
3331
|
};
|
|
3290
3332
|
|
|
3291
3333
|
const resolve = () => {
|
|
@@ -3317,6 +3359,40 @@ export class SharedLog<
|
|
|
3317
3359
|
);
|
|
3318
3360
|
}, timeoutMs);
|
|
3319
3361
|
|
|
3362
|
+
let requestAttempts = 0;
|
|
3363
|
+
const requestIntervalMs = 1000;
|
|
3364
|
+
const maxRequestAttempts = Math.max(
|
|
3365
|
+
3,
|
|
3366
|
+
Math.ceil(timeoutMs / requestIntervalMs),
|
|
3367
|
+
);
|
|
3368
|
+
|
|
3369
|
+
const requestReplicationInfo = () => {
|
|
3370
|
+
if (settled || this.closed) {
|
|
3371
|
+
return;
|
|
3372
|
+
}
|
|
3373
|
+
|
|
3374
|
+
if (requestAttempts >= maxRequestAttempts) {
|
|
3375
|
+
return;
|
|
3376
|
+
}
|
|
3377
|
+
|
|
3378
|
+
requestAttempts++;
|
|
3379
|
+
|
|
3380
|
+
this.rpc
|
|
3381
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3382
|
+
mode: new SeekDelivery({ redundancy: 1, to: [key] }),
|
|
3383
|
+
})
|
|
3384
|
+
.catch((e) => {
|
|
3385
|
+
// Best-effort: missing peers / unopened RPC should not fail the wait logic.
|
|
3386
|
+
if (isNotStartedError(e as Error)) {
|
|
3387
|
+
return;
|
|
3388
|
+
}
|
|
3389
|
+
});
|
|
3390
|
+
|
|
3391
|
+
if (requestAttempts < maxRequestAttempts) {
|
|
3392
|
+
requestTimer = setTimeout(requestReplicationInfo, requestIntervalMs);
|
|
3393
|
+
}
|
|
3394
|
+
};
|
|
3395
|
+
|
|
3320
3396
|
const check = async () => {
|
|
3321
3397
|
const iterator = this.replicationIndex?.iterate(
|
|
3322
3398
|
{ query: new StringMatch({ key: "hash", value: key.hashcode() }) },
|
|
@@ -3341,6 +3417,7 @@ export class SharedLog<
|
|
|
3341
3417
|
}
|
|
3342
3418
|
};
|
|
3343
3419
|
|
|
3420
|
+
requestReplicationInfo();
|
|
3344
3421
|
check();
|
|
3345
3422
|
this.events.addEventListener("replicator:mature", check);
|
|
3346
3423
|
this.events.addEventListener("replication:change", check);
|
|
@@ -3590,27 +3667,54 @@ export class SharedLog<
|
|
|
3590
3667
|
return 0;
|
|
3591
3668
|
}
|
|
3592
3669
|
|
|
3593
|
-
|
|
3594
|
-
|
|
3595
|
-
|
|
3596
|
-
|
|
3597
|
-
?.length ?? 1);
|
|
3598
|
-
const diffToOldest =
|
|
3599
|
-
subscribers > 1 ? now - this.oldestOpenTime - 1 : Number.MAX_SAFE_INTEGER;
|
|
3600
|
-
|
|
3601
|
-
const result = Math.min(
|
|
3602
|
-
this.timeUntilRoleMaturity,
|
|
3603
|
-
Math.max(diffToOldest, this.timeUntilRoleMaturity),
|
|
3604
|
-
Math.max(
|
|
3605
|
-
Math.round(
|
|
3606
|
-
(this.timeUntilRoleMaturity * Math.log(subscribers + 1)) / 3,
|
|
3607
|
-
),
|
|
3608
|
-
this.timeUntilRoleMaturity,
|
|
3609
|
-
),
|
|
3610
|
-
); // / 3 so that if 2 replicators and timeUntilRoleMaturity = 1e4 the result will be 1
|
|
3670
|
+
// Explicitly disable maturity gating (used by many tests).
|
|
3671
|
+
if (this.timeUntilRoleMaturity <= 0) {
|
|
3672
|
+
return 0;
|
|
3673
|
+
}
|
|
3611
3674
|
|
|
3612
|
-
|
|
3613
|
-
|
|
3675
|
+
// If we're alone (or pubsub isn't ready), a fixed maturity time is sufficient.
|
|
3676
|
+
// When there are multiple replicators we want a stable threshold that doesn't
|
|
3677
|
+
// depend on "now" (otherwise it can drift and turn into a flake).
|
|
3678
|
+
let subscribers = 1;
|
|
3679
|
+
if (!this.rpc.closed) {
|
|
3680
|
+
try {
|
|
3681
|
+
subscribers =
|
|
3682
|
+
(await this.node.services.pubsub.getSubscribers(this.rpc.topic))
|
|
3683
|
+
?.length ?? 1;
|
|
3684
|
+
} catch {
|
|
3685
|
+
// Best-effort only; fall back to 1.
|
|
3686
|
+
}
|
|
3687
|
+
}
|
|
3688
|
+
|
|
3689
|
+
if (subscribers <= 1) {
|
|
3690
|
+
return this.timeUntilRoleMaturity;
|
|
3691
|
+
}
|
|
3692
|
+
|
|
3693
|
+
// Use replication range timestamps to compute a stable "age gap" between the
|
|
3694
|
+
// newest and oldest known roles. This keeps the oldest role mature while
|
|
3695
|
+
// preventing newer roles from being treated as mature purely because time
|
|
3696
|
+
// passes between test steps / network events.
|
|
3697
|
+
let newestOpenTime = this.openTime;
|
|
3698
|
+
try {
|
|
3699
|
+
const newestIterator = await this.replicationIndex.iterate(
|
|
3700
|
+
{
|
|
3701
|
+
sort: [new Sort({ key: "timestamp", direction: "desc" })],
|
|
3702
|
+
},
|
|
3703
|
+
{ shape: { timestamp: true }, reference: true },
|
|
3704
|
+
);
|
|
3705
|
+
const newestTimestampFromDB = (await newestIterator.next(1))[0]?.value
|
|
3706
|
+
.timestamp;
|
|
3707
|
+
await newestIterator.close();
|
|
3708
|
+
if (newestTimestampFromDB != null) {
|
|
3709
|
+
newestOpenTime = Number(newestTimestampFromDB);
|
|
3710
|
+
}
|
|
3711
|
+
} catch {
|
|
3712
|
+
// Best-effort only; fall back to local open time.
|
|
3713
|
+
}
|
|
3714
|
+
|
|
3715
|
+
const ageGapToOldest = newestOpenTime - this.oldestOpenTime;
|
|
3716
|
+
const roleAge = Math.max(this.timeUntilRoleMaturity, ageGapToOldest);
|
|
3717
|
+
return roleAge < 0 ? 0 : roleAge;
|
|
3614
3718
|
}
|
|
3615
3719
|
|
|
3616
3720
|
async findLeaders(
|
|
@@ -3689,13 +3793,37 @@ export class SharedLog<
|
|
|
3689
3793
|
},
|
|
3690
3794
|
): Promise<Map<string, { intersecting: boolean }>> {
|
|
3691
3795
|
const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
|
|
3796
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3797
|
+
|
|
3798
|
+
// Use `uniqueReplicators` (replicator cache) once we've reconciled it against the
|
|
3799
|
+
// persisted replication index. Until then, fall back to live pubsub subscribers
|
|
3800
|
+
// and avoid relying on `uniqueReplicators` being complete.
|
|
3801
|
+
let peerFilter: Set<string> | undefined = undefined;
|
|
3802
|
+
if (this._replicatorsReconciled && this.uniqueReplicators.size > 0) {
|
|
3803
|
+
peerFilter = this.uniqueReplicators.has(selfHash)
|
|
3804
|
+
? this.uniqueReplicators
|
|
3805
|
+
: new Set([...this.uniqueReplicators, selfHash]);
|
|
3806
|
+
} else {
|
|
3807
|
+
try {
|
|
3808
|
+
const subscribers =
|
|
3809
|
+
(await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
3810
|
+
undefined;
|
|
3811
|
+
if (subscribers && subscribers.length > 0) {
|
|
3812
|
+
peerFilter = new Set(subscribers.map((key) => key.hashcode()));
|
|
3813
|
+
peerFilter.add(selfHash);
|
|
3814
|
+
}
|
|
3815
|
+
} catch {
|
|
3816
|
+
// Best-effort only; if pubsub isn't ready, do a full scan.
|
|
3817
|
+
}
|
|
3818
|
+
}
|
|
3692
3819
|
return getSamples<R>(
|
|
3693
3820
|
cursors,
|
|
3694
3821
|
this.replicationIndex,
|
|
3695
3822
|
roleAge,
|
|
3696
3823
|
this.indexableDomain.numbers,
|
|
3697
3824
|
{
|
|
3698
|
-
|
|
3825
|
+
peerFilter,
|
|
3826
|
+
uniqueReplicators: peerFilter,
|
|
3699
3827
|
},
|
|
3700
3828
|
);
|
|
3701
3829
|
}
|
|
@@ -3750,7 +3878,7 @@ export class SharedLog<
|
|
|
3750
3878
|
for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
|
|
3751
3879
|
v.delete(publicKey.hashcode());
|
|
3752
3880
|
if (v.size === 0) {
|
|
3753
|
-
this.
|
|
3881
|
+
this._requestIPruneResponseReplicatorSet.delete(k);
|
|
3754
3882
|
}
|
|
3755
3883
|
}
|
|
3756
3884
|
|
|
@@ -3789,6 +3917,15 @@ export class SharedLog<
|
|
|
3789
3917
|
.catch((e) => logger.error(e.toString()));
|
|
3790
3918
|
}
|
|
3791
3919
|
}
|
|
3920
|
+
|
|
3921
|
+
// Request the remote peer's replication info. This makes joins resilient to
|
|
3922
|
+
// timing-sensitive delivery/order issues where we may miss their initial
|
|
3923
|
+
// replication announcement.
|
|
3924
|
+
this.rpc
|
|
3925
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3926
|
+
mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
|
|
3927
|
+
})
|
|
3928
|
+
.catch((e) => logger.error(e.toString()));
|
|
3792
3929
|
} else {
|
|
3793
3930
|
await this.removeReplicator(publicKey);
|
|
3794
3931
|
}
|
|
@@ -3948,11 +4085,28 @@ export class SharedLog<
|
|
|
3948
4085
|
|
|
3949
4086
|
let cursor: NumberFromType<R>[] | undefined = undefined;
|
|
3950
4087
|
|
|
3951
|
-
|
|
4088
|
+
// Checked prune requests can legitimately take longer than a fixed 10s:
|
|
4089
|
+
// - The remote may not have the entry yet and will wait up to `_respondToIHaveTimeout`
|
|
4090
|
+
// - Leadership/replicator information may take up to `waitForReplicatorTimeout` to settle
|
|
4091
|
+
// If we time out too early we can end up with permanently prunable heads that never
|
|
4092
|
+
// get retried (a common CI flake in "prune before join" tests).
|
|
4093
|
+
const checkedPruneTimeoutMs =
|
|
4094
|
+
options?.timeout ??
|
|
4095
|
+
Math.max(
|
|
4096
|
+
10_000,
|
|
4097
|
+
Number(this._respondToIHaveTimeout ?? 0) +
|
|
4098
|
+
this.waitForReplicatorTimeout +
|
|
4099
|
+
PRUNE_DEBOUNCE_INTERVAL * 2,
|
|
4100
|
+
);
|
|
4101
|
+
|
|
4102
|
+
const timeout = setTimeout(() => {
|
|
3952
4103
|
reject(
|
|
3953
|
-
new Error(
|
|
4104
|
+
new Error(
|
|
4105
|
+
`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`,
|
|
4106
|
+
),
|
|
3954
4107
|
);
|
|
3955
|
-
},
|
|
4108
|
+
}, checkedPruneTimeoutMs);
|
|
4109
|
+
timeout.unref?.();
|
|
3956
4110
|
|
|
3957
4111
|
this._pendingDeletes.set(entry.hash, {
|
|
3958
4112
|
promise: deferredPromise,
|
|
@@ -4100,8 +4254,13 @@ export class SharedLog<
|
|
|
4100
4254
|
);
|
|
4101
4255
|
}
|
|
4102
4256
|
|
|
4103
|
-
async waitForPruned(
|
|
4104
|
-
|
|
4257
|
+
async waitForPruned(options?: {
|
|
4258
|
+
timeout?: number;
|
|
4259
|
+
signal?: AbortSignal;
|
|
4260
|
+
delayInterval?: number;
|
|
4261
|
+
timeoutMessage?: string;
|
|
4262
|
+
}) {
|
|
4263
|
+
await waitFor(() => this._pendingDeletes.size === 0, options);
|
|
4105
4264
|
}
|
|
4106
4265
|
|
|
4107
4266
|
async onReplicationChange(
|
|
@@ -4280,11 +4439,24 @@ export class SharedLog<
|
|
|
4280
4439
|
cpuUsage: this.cpuUsage?.value(),
|
|
4281
4440
|
});
|
|
4282
4441
|
|
|
4442
|
+
const absoluteDifference = Math.abs(dynamicRange.widthNormalized - newFactor);
|
|
4283
4443
|
const relativeDifference =
|
|
4284
|
-
|
|
4285
|
-
|
|
4444
|
+
absoluteDifference /
|
|
4445
|
+
Math.max(
|
|
4446
|
+
dynamicRange.widthNormalized,
|
|
4447
|
+
RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR,
|
|
4448
|
+
);
|
|
4449
|
+
|
|
4450
|
+
let minRelativeChange = RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE;
|
|
4451
|
+
if (this.replicationController.maxMemoryLimit != null) {
|
|
4452
|
+
minRelativeChange =
|
|
4453
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT;
|
|
4454
|
+
} else if (this.replicationController.maxCPUUsage != null) {
|
|
4455
|
+
minRelativeChange =
|
|
4456
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT;
|
|
4457
|
+
}
|
|
4286
4458
|
|
|
4287
|
-
if (relativeDifference >
|
|
4459
|
+
if (relativeDifference > minRelativeChange) {
|
|
4288
4460
|
// TODO can not reuse old range, since it will (potentially) affect the index because of sideeffects
|
|
4289
4461
|
dynamicRange = new this.indexableDomain.constructorRange({
|
|
4290
4462
|
offset: dynamicRange.start1,
|