@peerbit/shared-log 12.2.0-8c0b5fb → 12.2.0-90d77b6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/pid-convergence.d.ts +2 -0
- package/dist/benchmark/pid-convergence.d.ts.map +1 -0
- package/dist/benchmark/pid-convergence.js +138 -0
- package/dist/benchmark/pid-convergence.js.map +1 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.js +15 -1
- package/dist/benchmark/rateless-iblt-sender-startsync.js.map +1 -1
- package/dist/benchmark/sync-catchup.d.ts +3 -0
- package/dist/benchmark/sync-catchup.d.ts.map +1 -0
- package/dist/benchmark/sync-catchup.js +109 -0
- package/dist/benchmark/sync-catchup.js.map +1 -0
- package/dist/src/index.d.ts +14 -2
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +256 -82
- package/dist/src/index.js.map +1 -1
- package/dist/src/ranges.d.ts +1 -0
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +48 -18
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/rateless-iblt.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.js +41 -18
- package/dist/src/sync/rateless-iblt.js.map +1 -1
- package/package.json +18 -18
- package/src/index.ts +324 -125
- package/src/ranges.ts +97 -65
- package/src/sync/rateless-iblt.ts +37 -18
package/src/index.ts
CHANGED
|
@@ -366,6 +366,8 @@ export type SharedLogOptions<
|
|
|
366
366
|
syncronizer?: SynchronizerConstructor<R>;
|
|
367
367
|
timeUntilRoleMaturity?: number;
|
|
368
368
|
waitForReplicatorTimeout?: number;
|
|
369
|
+
waitForReplicatorRequestIntervalMs?: number;
|
|
370
|
+
waitForReplicatorRequestMaxAttempts?: number;
|
|
369
371
|
waitForPruneDelay?: number;
|
|
370
372
|
distributionDebounceTime?: number;
|
|
371
373
|
compatibility?: number;
|
|
@@ -376,11 +378,20 @@ export type SharedLogOptions<
|
|
|
376
378
|
export const DEFAULT_MIN_REPLICAS = 2;
|
|
377
379
|
export const WAIT_FOR_REPLICATOR_TIMEOUT = 9000;
|
|
378
380
|
export const WAIT_FOR_ROLE_MATURITY = 5000;
|
|
379
|
-
export const
|
|
381
|
+
export const WAIT_FOR_REPLICATOR_REQUEST_INTERVAL = 1000;
|
|
382
|
+
export const WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS = 3;
|
|
383
|
+
// TODO(prune): Investigate if/when a non-zero prune delay is required for correctness
|
|
384
|
+
// (e.g. responsibility/replication-info message reordering in multi-peer scenarios).
|
|
385
|
+
// Prefer making pruning robust without timing-based heuristics.
|
|
386
|
+
export const WAIT_FOR_PRUNE_DELAY = 0;
|
|
380
387
|
const PRUNE_DEBOUNCE_INTERVAL = 500;
|
|
381
388
|
|
|
382
389
|
// DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
|
|
383
390
|
const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
|
|
391
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE = 0.01;
|
|
392
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
|
|
393
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
|
|
394
|
+
const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
|
|
384
395
|
|
|
385
396
|
const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
|
|
386
397
|
|
|
@@ -456,6 +467,7 @@ export class SharedLog<
|
|
|
456
467
|
private recentlyRebalanced!: Cache<string>;
|
|
457
468
|
|
|
458
469
|
uniqueReplicators!: Set<string>;
|
|
470
|
+
private _replicatorsReconciled!: boolean;
|
|
459
471
|
|
|
460
472
|
/* private _totalParticipation!: number; */
|
|
461
473
|
|
|
@@ -556,6 +568,8 @@ export class SharedLog<
|
|
|
556
568
|
|
|
557
569
|
timeUntilRoleMaturity!: number;
|
|
558
570
|
waitForReplicatorTimeout!: number;
|
|
571
|
+
waitForReplicatorRequestIntervalMs!: number;
|
|
572
|
+
waitForReplicatorRequestMaxAttempts?: number;
|
|
559
573
|
waitForPruneDelay!: number;
|
|
560
574
|
distributionDebounceTime!: number;
|
|
561
575
|
|
|
@@ -617,15 +631,6 @@ export class SharedLog<
|
|
|
617
631
|
) {
|
|
618
632
|
this.rebalanceParticipationDebounced = undefined;
|
|
619
633
|
|
|
620
|
-
// make the rebalancing to respect warmup time
|
|
621
|
-
let intervalTime = interval * 2;
|
|
622
|
-
let timeout = setTimeout(() => {
|
|
623
|
-
intervalTime = interval;
|
|
624
|
-
}, this.timeUntilRoleMaturity);
|
|
625
|
-
this._closeController.signal.addEventListener("abort", () => {
|
|
626
|
-
clearTimeout(timeout);
|
|
627
|
-
});
|
|
628
|
-
|
|
629
634
|
this.rebalanceParticipationDebounced = debounceFixedInterval(
|
|
630
635
|
() => this.rebalanceParticipation(),
|
|
631
636
|
/* Math.max(
|
|
@@ -635,7 +640,7 @@ export class SharedLog<
|
|
|
635
640
|
REBALANCE_DEBOUNCE_INTERVAL
|
|
636
641
|
)
|
|
637
642
|
) */
|
|
638
|
-
|
|
643
|
+
interval, // TODO make this dynamic on the number of replicators
|
|
639
644
|
);
|
|
640
645
|
}
|
|
641
646
|
|
|
@@ -1166,16 +1171,31 @@ export class SharedLog<
|
|
|
1166
1171
|
|
|
1167
1172
|
let prevCount = deleted.length;
|
|
1168
1173
|
|
|
1169
|
-
|
|
1174
|
+
const existingById = new Map(deleted.map((x) => [x.idString, x]));
|
|
1175
|
+
const hasSameRanges =
|
|
1176
|
+
deleted.length === ranges.length &&
|
|
1177
|
+
ranges.every((range) => {
|
|
1178
|
+
const existing = existingById.get(range.idString);
|
|
1179
|
+
return existing != null && existing.equalRange(range);
|
|
1180
|
+
});
|
|
1170
1181
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
})
|
|
1178
|
-
|
|
1182
|
+
// Avoid churn on repeated full-state announcements that don't change any
|
|
1183
|
+
// replication ranges. This prevents unnecessary `replication:change`
|
|
1184
|
+
// events and rebalancing cascades.
|
|
1185
|
+
if (hasSameRanges) {
|
|
1186
|
+
diffs = [];
|
|
1187
|
+
} else {
|
|
1188
|
+
await this.replicationIndex.del({ query: { hash: from.hashcode() } });
|
|
1189
|
+
|
|
1190
|
+
diffs = [
|
|
1191
|
+
...deleted.map((x) => {
|
|
1192
|
+
return { range: x, type: "removed" as const, timestamp };
|
|
1193
|
+
}),
|
|
1194
|
+
...ranges.map((x) => {
|
|
1195
|
+
return { range: x, type: "added" as const, timestamp };
|
|
1196
|
+
}),
|
|
1197
|
+
];
|
|
1198
|
+
}
|
|
1179
1199
|
|
|
1180
1200
|
isNewReplicator = prevCount === 0 && ranges.length > 0;
|
|
1181
1201
|
} else {
|
|
@@ -1198,16 +1218,20 @@ export class SharedLog<
|
|
|
1198
1218
|
}
|
|
1199
1219
|
}
|
|
1200
1220
|
|
|
1221
|
+
let prevCountForOwner: number | undefined = undefined;
|
|
1201
1222
|
if (existing.length === 0) {
|
|
1202
|
-
|
|
1223
|
+
prevCountForOwner = await this.replicationIndex.count({
|
|
1203
1224
|
query: new StringMatch({ key: "hash", value: from.hashcode() }),
|
|
1204
1225
|
});
|
|
1205
|
-
isNewReplicator =
|
|
1226
|
+
isNewReplicator = prevCountForOwner === 0;
|
|
1206
1227
|
} else {
|
|
1207
1228
|
isNewReplicator = false;
|
|
1208
1229
|
}
|
|
1209
1230
|
|
|
1210
|
-
if (
|
|
1231
|
+
if (
|
|
1232
|
+
checkDuplicates &&
|
|
1233
|
+
(existing.length > 0 || (prevCountForOwner ?? 0) > 0)
|
|
1234
|
+
) {
|
|
1211
1235
|
let deduplicated: ReplicationRangeIndexable<any>[] = [];
|
|
1212
1236
|
|
|
1213
1237
|
// TODO also deduplicate/de-overlap among the ranges that ought to be inserted?
|
|
@@ -1874,6 +1898,7 @@ export class SharedLog<
|
|
|
1874
1898
|
this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
|
|
1875
1899
|
|
|
1876
1900
|
this.uniqueReplicators = new Set();
|
|
1901
|
+
this._replicatorsReconciled = false;
|
|
1877
1902
|
|
|
1878
1903
|
this.openTime = +new Date();
|
|
1879
1904
|
this.oldestOpenTime = this.openTime;
|
|
@@ -1883,13 +1908,32 @@ export class SharedLog<
|
|
|
1883
1908
|
this.timeUntilRoleMaturity =
|
|
1884
1909
|
options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
|
|
1885
1910
|
this.waitForReplicatorTimeout =
|
|
1886
|
-
options?.waitForReplicatorTimeout
|
|
1887
|
-
this.
|
|
1911
|
+
options?.waitForReplicatorTimeout ?? WAIT_FOR_REPLICATOR_TIMEOUT;
|
|
1912
|
+
this.waitForReplicatorRequestIntervalMs =
|
|
1913
|
+
options?.waitForReplicatorRequestIntervalMs ??
|
|
1914
|
+
WAIT_FOR_REPLICATOR_REQUEST_INTERVAL;
|
|
1915
|
+
this.waitForReplicatorRequestMaxAttempts =
|
|
1916
|
+
options?.waitForReplicatorRequestMaxAttempts;
|
|
1917
|
+
this.waitForPruneDelay = options?.waitForPruneDelay ?? WAIT_FOR_PRUNE_DELAY;
|
|
1888
1918
|
|
|
1889
1919
|
if (this.waitForReplicatorTimeout < this.timeUntilRoleMaturity) {
|
|
1890
1920
|
this.waitForReplicatorTimeout = this.timeUntilRoleMaturity; // does not makes sense to expect a replicator to mature faster than it is reachable
|
|
1891
1921
|
}
|
|
1892
1922
|
|
|
1923
|
+
if (this.waitForReplicatorRequestIntervalMs <= 0) {
|
|
1924
|
+
throw new Error(
|
|
1925
|
+
"waitForReplicatorRequestIntervalMs must be a positive number",
|
|
1926
|
+
);
|
|
1927
|
+
}
|
|
1928
|
+
if (
|
|
1929
|
+
this.waitForReplicatorRequestMaxAttempts != null &&
|
|
1930
|
+
this.waitForReplicatorRequestMaxAttempts <= 0
|
|
1931
|
+
) {
|
|
1932
|
+
throw new Error(
|
|
1933
|
+
"waitForReplicatorRequestMaxAttempts must be a positive number",
|
|
1934
|
+
);
|
|
1935
|
+
}
|
|
1936
|
+
|
|
1893
1937
|
this._closeController = new AbortController();
|
|
1894
1938
|
this._isTrustedReplicator = options?.canReplicate;
|
|
1895
1939
|
this.keep = options?.keep;
|
|
@@ -2176,7 +2220,16 @@ export class SharedLog<
|
|
|
2176
2220
|
await super.afterOpen();
|
|
2177
2221
|
|
|
2178
2222
|
// We do this here, because these calls requires this.closed == false
|
|
2179
|
-
this.pruneOfflineReplicators()
|
|
2223
|
+
void this.pruneOfflineReplicators()
|
|
2224
|
+
.then(() => {
|
|
2225
|
+
this._replicatorsReconciled = true;
|
|
2226
|
+
})
|
|
2227
|
+
.catch((error) => {
|
|
2228
|
+
if (isNotStartedError(error as Error)) {
|
|
2229
|
+
return;
|
|
2230
|
+
}
|
|
2231
|
+
logger.error(error);
|
|
2232
|
+
});
|
|
2180
2233
|
|
|
2181
2234
|
await this.rebalanceParticipation();
|
|
2182
2235
|
|
|
@@ -2392,15 +2445,35 @@ export class SharedLog<
|
|
|
2392
2445
|
set.add(key);
|
|
2393
2446
|
}
|
|
2394
2447
|
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2448
|
+
if (options?.reachableOnly) {
|
|
2449
|
+
// Prefer the live pubsub subscriber set when filtering reachability.
|
|
2450
|
+
// `uniqueReplicators` is primarily driven by replication messages and can lag during
|
|
2451
|
+
// joins/restarts; using subscribers prevents excluding peers that are reachable but
|
|
2452
|
+
// whose replication ranges were loaded from disk or haven't been processed yet.
|
|
2453
|
+
const subscribers =
|
|
2454
|
+
(await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
2455
|
+
undefined;
|
|
2456
|
+
const subscriberHashcodes = subscribers
|
|
2457
|
+
? new Set(subscribers.map((key) => key.hashcode()))
|
|
2458
|
+
: undefined;
|
|
2459
|
+
|
|
2460
|
+
const reachable: string[] = [];
|
|
2461
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2462
|
+
for (const peer of set) {
|
|
2463
|
+
if (peer === selfHash) {
|
|
2464
|
+
reachable.push(peer);
|
|
2465
|
+
continue;
|
|
2466
|
+
}
|
|
2467
|
+
if (
|
|
2468
|
+
subscriberHashcodes
|
|
2469
|
+
? subscriberHashcodes.has(peer)
|
|
2470
|
+
: this.uniqueReplicators.has(peer)
|
|
2471
|
+
) {
|
|
2472
|
+
reachable.push(peer);
|
|
2473
|
+
}
|
|
2400
2474
|
}
|
|
2475
|
+
return reachable;
|
|
2401
2476
|
}
|
|
2402
|
-
return reachableSet;
|
|
2403
|
-
}
|
|
2404
2477
|
|
|
2405
2478
|
return [...set];
|
|
2406
2479
|
} catch (error) {
|
|
@@ -2844,22 +2917,20 @@ export class SharedLog<
|
|
|
2844
2917
|
context.from!.hashcode(),
|
|
2845
2918
|
);
|
|
2846
2919
|
} else if (msg instanceof RequestReplicationInfoMessage) {
|
|
2847
|
-
// TODO this message type is never used, should we remove it?
|
|
2848
|
-
|
|
2849
2920
|
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2850
2921
|
return;
|
|
2851
2922
|
}
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
x.toReplicationRange(),
|
|
2856
|
-
),
|
|
2857
|
-
}),
|
|
2858
|
-
{
|
|
2859
|
-
mode: new SilentDelivery({ to: [context.from], redundancy: 1 }),
|
|
2860
|
-
},
|
|
2923
|
+
|
|
2924
|
+
const segments = (await this.getMyReplicationSegments()).map((x) =>
|
|
2925
|
+
x.toReplicationRange(),
|
|
2861
2926
|
);
|
|
2862
2927
|
|
|
2928
|
+
this.rpc
|
|
2929
|
+
.send(new AllReplicatingSegmentsMessage({ segments }), {
|
|
2930
|
+
mode: new SeekDelivery({ to: [context.from], redundancy: 1 }),
|
|
2931
|
+
})
|
|
2932
|
+
.catch((e) => logger.error(e.toString()));
|
|
2933
|
+
|
|
2863
2934
|
// for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
|
|
2864
2935
|
if (this.v8Behaviour) {
|
|
2865
2936
|
const role = this.getRole();
|
|
@@ -2881,73 +2952,60 @@ export class SharedLog<
|
|
|
2881
2952
|
}
|
|
2882
2953
|
}
|
|
2883
2954
|
} else if (
|
|
2884
|
-
|
|
2885
|
-
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
let replicationInfoMessage = msg as
|
|
2892
|
-
| AllReplicatingSegmentsMessage
|
|
2893
|
-
| AddedReplicationSegmentMessage;
|
|
2894
|
-
|
|
2895
|
-
// we have this statement because peers might have changed/announced their role,
|
|
2896
|
-
// but we don't know them as "subscribers" yet. i.e. they are not online
|
|
2897
|
-
|
|
2898
|
-
this.waitFor(context.from, {
|
|
2899
|
-
signal: this._closeController.signal,
|
|
2900
|
-
timeout: this.waitForReplicatorTimeout,
|
|
2901
|
-
})
|
|
2902
|
-
.then(async () => {
|
|
2903
|
-
// do use an operation log here, because we want to make sure that we don't miss any updates
|
|
2904
|
-
// and do them in the right order
|
|
2905
|
-
const prev = this.latestReplicationInfoMessage.get(
|
|
2906
|
-
context.from!.hashcode(),
|
|
2907
|
-
);
|
|
2955
|
+
msg instanceof AllReplicatingSegmentsMessage ||
|
|
2956
|
+
msg instanceof AddedReplicationSegmentMessage
|
|
2957
|
+
) {
|
|
2958
|
+
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2959
|
+
return;
|
|
2960
|
+
}
|
|
2908
2961
|
|
|
2909
|
-
|
|
2962
|
+
const replicationInfoMessage = msg as
|
|
2963
|
+
| AllReplicatingSegmentsMessage
|
|
2964
|
+
| AddedReplicationSegmentMessage;
|
|
2965
|
+
|
|
2966
|
+
// Process replication updates even if the sender isn't yet considered "ready" by
|
|
2967
|
+
// `Program.waitFor()`. Dropping these messages can lead to missing replicator info
|
|
2968
|
+
// (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
|
|
2969
|
+
const from = context.from!;
|
|
2970
|
+
const messageTimestamp = context.message.header.timestamp;
|
|
2971
|
+
(async () => {
|
|
2972
|
+
const prev = this.latestReplicationInfoMessage.get(from.hashcode());
|
|
2973
|
+
if (prev && prev > messageTimestamp) {
|
|
2910
2974
|
return;
|
|
2911
2975
|
}
|
|
2912
2976
|
|
|
2913
|
-
this.latestReplicationInfoMessage.set(
|
|
2914
|
-
context.from!.hashcode(),
|
|
2915
|
-
context.message.header.timestamp,
|
|
2916
|
-
);
|
|
2917
|
-
|
|
2918
|
-
let reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2977
|
+
this.latestReplicationInfoMessage.set(from.hashcode(), messageTimestamp);
|
|
2919
2978
|
|
|
2920
2979
|
if (this.closed) {
|
|
2921
2980
|
return;
|
|
2922
2981
|
}
|
|
2923
2982
|
|
|
2983
|
+
const reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2924
2984
|
await this.addReplicationRange(
|
|
2925
2985
|
replicationInfoMessage.segments.map((x) =>
|
|
2926
|
-
x.toReplicationRangeIndexable(
|
|
2986
|
+
x.toReplicationRangeIndexable(from),
|
|
2927
2987
|
),
|
|
2928
|
-
|
|
2988
|
+
from,
|
|
2929
2989
|
{
|
|
2930
2990
|
reset,
|
|
2931
2991
|
checkDuplicates: true,
|
|
2932
|
-
timestamp: Number(
|
|
2992
|
+
timestamp: Number(messageTimestamp),
|
|
2933
2993
|
},
|
|
2934
2994
|
);
|
|
2935
|
-
|
|
2936
|
-
/* await this._modifyReplicators(msg.role, context.from!); */
|
|
2937
|
-
})
|
|
2938
|
-
.catch((e) => {
|
|
2995
|
+
})().catch((e) => {
|
|
2939
2996
|
if (isNotStartedError(e)) {
|
|
2940
2997
|
return;
|
|
2941
2998
|
}
|
|
2942
2999
|
logger.error(
|
|
2943
|
-
|
|
2944
|
-
e?.message
|
|
3000
|
+
`Failed to apply replication settings from '${from.hashcode()}': ${
|
|
3001
|
+
e?.message ?? e
|
|
3002
|
+
}`,
|
|
2945
3003
|
);
|
|
2946
3004
|
});
|
|
2947
|
-
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
|
|
3005
|
+
} else if (msg instanceof StoppedReplicating) {
|
|
3006
|
+
if (context.from.equals(this.node.identity.publicKey)) {
|
|
3007
|
+
return;
|
|
3008
|
+
}
|
|
2951
3009
|
|
|
2952
3010
|
const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(
|
|
2953
3011
|
msg.segmentIds,
|
|
@@ -3140,25 +3198,29 @@ export class SharedLog<
|
|
|
3140
3198
|
},
|
|
3141
3199
|
): Promise<void> {
|
|
3142
3200
|
let entriesToReplicate: Entry<T>[] = [];
|
|
3143
|
-
if (options?.replicate) {
|
|
3201
|
+
if (options?.replicate && this.log.length > 0) {
|
|
3144
3202
|
// TODO this block should perhaps be called from a callback on the this.log.join method on all the ignored element because already joined, like "onAlreadyJoined"
|
|
3145
3203
|
|
|
3146
3204
|
// check which entrise we already have but not are replicating, and replicate them
|
|
3147
3205
|
// we can not just do the 'join' call because it will ignore the already joined entries
|
|
3148
3206
|
for (const element of entries) {
|
|
3149
3207
|
if (typeof element === "string") {
|
|
3150
|
-
|
|
3151
|
-
|
|
3152
|
-
|
|
3208
|
+
if (await this.log.has(element)) {
|
|
3209
|
+
const entry = await this.log.get(element);
|
|
3210
|
+
if (entry) {
|
|
3211
|
+
entriesToReplicate.push(entry);
|
|
3212
|
+
}
|
|
3153
3213
|
}
|
|
3154
3214
|
} else if (element instanceof Entry) {
|
|
3155
3215
|
if (await this.log.has(element.hash)) {
|
|
3156
3216
|
entriesToReplicate.push(element);
|
|
3157
3217
|
}
|
|
3158
3218
|
} else {
|
|
3159
|
-
|
|
3160
|
-
|
|
3161
|
-
|
|
3219
|
+
if (await this.log.has(element.hash)) {
|
|
3220
|
+
const entry = await this.log.get(element.hash);
|
|
3221
|
+
if (entry) {
|
|
3222
|
+
entriesToReplicate.push(entry);
|
|
3223
|
+
}
|
|
3162
3224
|
}
|
|
3163
3225
|
}
|
|
3164
3226
|
}
|
|
@@ -3277,6 +3339,7 @@ export class SharedLog<
|
|
|
3277
3339
|
|
|
3278
3340
|
let settled = false;
|
|
3279
3341
|
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
3342
|
+
let requestTimer: ReturnType<typeof setTimeout> | undefined;
|
|
3280
3343
|
|
|
3281
3344
|
const clear = () => {
|
|
3282
3345
|
this.events.removeEventListener("replicator:mature", check);
|
|
@@ -3286,6 +3349,10 @@ export class SharedLog<
|
|
|
3286
3349
|
clearTimeout(timer);
|
|
3287
3350
|
timer = undefined;
|
|
3288
3351
|
}
|
|
3352
|
+
if (requestTimer != null) {
|
|
3353
|
+
clearTimeout(requestTimer);
|
|
3354
|
+
requestTimer = undefined;
|
|
3355
|
+
}
|
|
3289
3356
|
};
|
|
3290
3357
|
|
|
3291
3358
|
const resolve = () => {
|
|
@@ -3317,6 +3384,42 @@ export class SharedLog<
|
|
|
3317
3384
|
);
|
|
3318
3385
|
}, timeoutMs);
|
|
3319
3386
|
|
|
3387
|
+
let requestAttempts = 0;
|
|
3388
|
+
const requestIntervalMs = this.waitForReplicatorRequestIntervalMs;
|
|
3389
|
+
const maxRequestAttempts =
|
|
3390
|
+
this.waitForReplicatorRequestMaxAttempts ??
|
|
3391
|
+
Math.max(
|
|
3392
|
+
WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS,
|
|
3393
|
+
Math.ceil(timeoutMs / requestIntervalMs),
|
|
3394
|
+
);
|
|
3395
|
+
|
|
3396
|
+
const requestReplicationInfo = () => {
|
|
3397
|
+
if (settled || this.closed) {
|
|
3398
|
+
return;
|
|
3399
|
+
}
|
|
3400
|
+
|
|
3401
|
+
if (requestAttempts >= maxRequestAttempts) {
|
|
3402
|
+
return;
|
|
3403
|
+
}
|
|
3404
|
+
|
|
3405
|
+
requestAttempts++;
|
|
3406
|
+
|
|
3407
|
+
this.rpc
|
|
3408
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3409
|
+
mode: new SeekDelivery({ redundancy: 1, to: [key] }),
|
|
3410
|
+
})
|
|
3411
|
+
.catch((e) => {
|
|
3412
|
+
// Best-effort: missing peers / unopened RPC should not fail the wait logic.
|
|
3413
|
+
if (isNotStartedError(e as Error)) {
|
|
3414
|
+
return;
|
|
3415
|
+
}
|
|
3416
|
+
});
|
|
3417
|
+
|
|
3418
|
+
if (requestAttempts < maxRequestAttempts) {
|
|
3419
|
+
requestTimer = setTimeout(requestReplicationInfo, requestIntervalMs);
|
|
3420
|
+
}
|
|
3421
|
+
};
|
|
3422
|
+
|
|
3320
3423
|
const check = async () => {
|
|
3321
3424
|
const iterator = this.replicationIndex?.iterate(
|
|
3322
3425
|
{ query: new StringMatch({ key: "hash", value: key.hashcode() }) },
|
|
@@ -3341,6 +3444,7 @@ export class SharedLog<
|
|
|
3341
3444
|
}
|
|
3342
3445
|
};
|
|
3343
3446
|
|
|
3447
|
+
requestReplicationInfo();
|
|
3344
3448
|
check();
|
|
3345
3449
|
this.events.addEventListener("replicator:mature", check);
|
|
3346
3450
|
this.events.addEventListener("replication:change", check);
|
|
@@ -3590,27 +3694,54 @@ export class SharedLog<
|
|
|
3590
3694
|
return 0;
|
|
3591
3695
|
}
|
|
3592
3696
|
|
|
3593
|
-
|
|
3594
|
-
|
|
3595
|
-
|
|
3596
|
-
|
|
3597
|
-
?.length ?? 1);
|
|
3598
|
-
const diffToOldest =
|
|
3599
|
-
subscribers > 1 ? now - this.oldestOpenTime - 1 : Number.MAX_SAFE_INTEGER;
|
|
3600
|
-
|
|
3601
|
-
const result = Math.min(
|
|
3602
|
-
this.timeUntilRoleMaturity,
|
|
3603
|
-
Math.max(diffToOldest, this.timeUntilRoleMaturity),
|
|
3604
|
-
Math.max(
|
|
3605
|
-
Math.round(
|
|
3606
|
-
(this.timeUntilRoleMaturity * Math.log(subscribers + 1)) / 3,
|
|
3607
|
-
),
|
|
3608
|
-
this.timeUntilRoleMaturity,
|
|
3609
|
-
),
|
|
3610
|
-
); // / 3 so that if 2 replicators and timeUntilRoleMaturity = 1e4 the result will be 1
|
|
3697
|
+
// Explicitly disable maturity gating (used by many tests).
|
|
3698
|
+
if (this.timeUntilRoleMaturity <= 0) {
|
|
3699
|
+
return 0;
|
|
3700
|
+
}
|
|
3611
3701
|
|
|
3612
|
-
|
|
3613
|
-
|
|
3702
|
+
// If we're alone (or pubsub isn't ready), a fixed maturity time is sufficient.
|
|
3703
|
+
// When there are multiple replicators we want a stable threshold that doesn't
|
|
3704
|
+
// depend on "now" (otherwise it can drift and turn into a flake).
|
|
3705
|
+
let subscribers = 1;
|
|
3706
|
+
if (!this.rpc.closed) {
|
|
3707
|
+
try {
|
|
3708
|
+
subscribers =
|
|
3709
|
+
(await this.node.services.pubsub.getSubscribers(this.rpc.topic))
|
|
3710
|
+
?.length ?? 1;
|
|
3711
|
+
} catch {
|
|
3712
|
+
// Best-effort only; fall back to 1.
|
|
3713
|
+
}
|
|
3714
|
+
}
|
|
3715
|
+
|
|
3716
|
+
if (subscribers <= 1) {
|
|
3717
|
+
return this.timeUntilRoleMaturity;
|
|
3718
|
+
}
|
|
3719
|
+
|
|
3720
|
+
// Use replication range timestamps to compute a stable "age gap" between the
|
|
3721
|
+
// newest and oldest known roles. This keeps the oldest role mature while
|
|
3722
|
+
// preventing newer roles from being treated as mature purely because time
|
|
3723
|
+
// passes between test steps / network events.
|
|
3724
|
+
let newestOpenTime = this.openTime;
|
|
3725
|
+
try {
|
|
3726
|
+
const newestIterator = await this.replicationIndex.iterate(
|
|
3727
|
+
{
|
|
3728
|
+
sort: [new Sort({ key: "timestamp", direction: "desc" })],
|
|
3729
|
+
},
|
|
3730
|
+
{ shape: { timestamp: true }, reference: true },
|
|
3731
|
+
);
|
|
3732
|
+
const newestTimestampFromDB = (await newestIterator.next(1))[0]?.value
|
|
3733
|
+
.timestamp;
|
|
3734
|
+
await newestIterator.close();
|
|
3735
|
+
if (newestTimestampFromDB != null) {
|
|
3736
|
+
newestOpenTime = Number(newestTimestampFromDB);
|
|
3737
|
+
}
|
|
3738
|
+
} catch {
|
|
3739
|
+
// Best-effort only; fall back to local open time.
|
|
3740
|
+
}
|
|
3741
|
+
|
|
3742
|
+
const ageGapToOldest = newestOpenTime - this.oldestOpenTime;
|
|
3743
|
+
const roleAge = Math.max(this.timeUntilRoleMaturity, ageGapToOldest);
|
|
3744
|
+
return roleAge < 0 ? 0 : roleAge;
|
|
3614
3745
|
}
|
|
3615
3746
|
|
|
3616
3747
|
async findLeaders(
|
|
@@ -3689,13 +3820,37 @@ export class SharedLog<
|
|
|
3689
3820
|
},
|
|
3690
3821
|
): Promise<Map<string, { intersecting: boolean }>> {
|
|
3691
3822
|
const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
|
|
3823
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3824
|
+
|
|
3825
|
+
// Use `uniqueReplicators` (replicator cache) once we've reconciled it against the
|
|
3826
|
+
// persisted replication index. Until then, fall back to live pubsub subscribers
|
|
3827
|
+
// and avoid relying on `uniqueReplicators` being complete.
|
|
3828
|
+
let peerFilter: Set<string> | undefined = undefined;
|
|
3829
|
+
if (this._replicatorsReconciled && this.uniqueReplicators.size > 0) {
|
|
3830
|
+
peerFilter = this.uniqueReplicators.has(selfHash)
|
|
3831
|
+
? this.uniqueReplicators
|
|
3832
|
+
: new Set([...this.uniqueReplicators, selfHash]);
|
|
3833
|
+
} else {
|
|
3834
|
+
try {
|
|
3835
|
+
const subscribers =
|
|
3836
|
+
(await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
3837
|
+
undefined;
|
|
3838
|
+
if (subscribers && subscribers.length > 0) {
|
|
3839
|
+
peerFilter = new Set(subscribers.map((key) => key.hashcode()));
|
|
3840
|
+
peerFilter.add(selfHash);
|
|
3841
|
+
}
|
|
3842
|
+
} catch {
|
|
3843
|
+
// Best-effort only; if pubsub isn't ready, do a full scan.
|
|
3844
|
+
}
|
|
3845
|
+
}
|
|
3692
3846
|
return getSamples<R>(
|
|
3693
3847
|
cursors,
|
|
3694
3848
|
this.replicationIndex,
|
|
3695
3849
|
roleAge,
|
|
3696
3850
|
this.indexableDomain.numbers,
|
|
3697
3851
|
{
|
|
3698
|
-
|
|
3852
|
+
peerFilter,
|
|
3853
|
+
uniqueReplicators: peerFilter,
|
|
3699
3854
|
},
|
|
3700
3855
|
);
|
|
3701
3856
|
}
|
|
@@ -3750,7 +3905,7 @@ export class SharedLog<
|
|
|
3750
3905
|
for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
|
|
3751
3906
|
v.delete(publicKey.hashcode());
|
|
3752
3907
|
if (v.size === 0) {
|
|
3753
|
-
this.
|
|
3908
|
+
this._requestIPruneResponseReplicatorSet.delete(k);
|
|
3754
3909
|
}
|
|
3755
3910
|
}
|
|
3756
3911
|
|
|
@@ -3789,6 +3944,15 @@ export class SharedLog<
|
|
|
3789
3944
|
.catch((e) => logger.error(e.toString()));
|
|
3790
3945
|
}
|
|
3791
3946
|
}
|
|
3947
|
+
|
|
3948
|
+
// Request the remote peer's replication info. This makes joins resilient to
|
|
3949
|
+
// timing-sensitive delivery/order issues where we may miss their initial
|
|
3950
|
+
// replication announcement.
|
|
3951
|
+
this.rpc
|
|
3952
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3953
|
+
mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
|
|
3954
|
+
})
|
|
3955
|
+
.catch((e) => logger.error(e.toString()));
|
|
3792
3956
|
} else {
|
|
3793
3957
|
await this.removeReplicator(publicKey);
|
|
3794
3958
|
}
|
|
@@ -3948,11 +4112,28 @@ export class SharedLog<
|
|
|
3948
4112
|
|
|
3949
4113
|
let cursor: NumberFromType<R>[] | undefined = undefined;
|
|
3950
4114
|
|
|
3951
|
-
|
|
4115
|
+
// Checked prune requests can legitimately take longer than a fixed 10s:
|
|
4116
|
+
// - The remote may not have the entry yet and will wait up to `_respondToIHaveTimeout`
|
|
4117
|
+
// - Leadership/replicator information may take up to `waitForReplicatorTimeout` to settle
|
|
4118
|
+
// If we time out too early we can end up with permanently prunable heads that never
|
|
4119
|
+
// get retried (a common CI flake in "prune before join" tests).
|
|
4120
|
+
const checkedPruneTimeoutMs =
|
|
4121
|
+
options?.timeout ??
|
|
4122
|
+
Math.max(
|
|
4123
|
+
10_000,
|
|
4124
|
+
Number(this._respondToIHaveTimeout ?? 0) +
|
|
4125
|
+
this.waitForReplicatorTimeout +
|
|
4126
|
+
PRUNE_DEBOUNCE_INTERVAL * 2,
|
|
4127
|
+
);
|
|
4128
|
+
|
|
4129
|
+
const timeout = setTimeout(() => {
|
|
3952
4130
|
reject(
|
|
3953
|
-
new Error(
|
|
4131
|
+
new Error(
|
|
4132
|
+
`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`,
|
|
4133
|
+
),
|
|
3954
4134
|
);
|
|
3955
|
-
},
|
|
4135
|
+
}, checkedPruneTimeoutMs);
|
|
4136
|
+
timeout.unref?.();
|
|
3956
4137
|
|
|
3957
4138
|
this._pendingDeletes.set(entry.hash, {
|
|
3958
4139
|
promise: deferredPromise,
|
|
@@ -4100,8 +4281,13 @@ export class SharedLog<
|
|
|
4100
4281
|
);
|
|
4101
4282
|
}
|
|
4102
4283
|
|
|
4103
|
-
async waitForPruned(
|
|
4104
|
-
|
|
4284
|
+
async waitForPruned(options?: {
|
|
4285
|
+
timeout?: number;
|
|
4286
|
+
signal?: AbortSignal;
|
|
4287
|
+
delayInterval?: number;
|
|
4288
|
+
timeoutMessage?: string;
|
|
4289
|
+
}) {
|
|
4290
|
+
await waitFor(() => this._pendingDeletes.size === 0, options);
|
|
4105
4291
|
}
|
|
4106
4292
|
|
|
4107
4293
|
async onReplicationChange(
|
|
@@ -4280,11 +4466,24 @@ export class SharedLog<
|
|
|
4280
4466
|
cpuUsage: this.cpuUsage?.value(),
|
|
4281
4467
|
});
|
|
4282
4468
|
|
|
4469
|
+
const absoluteDifference = Math.abs(dynamicRange.widthNormalized - newFactor);
|
|
4283
4470
|
const relativeDifference =
|
|
4284
|
-
|
|
4285
|
-
|
|
4471
|
+
absoluteDifference /
|
|
4472
|
+
Math.max(
|
|
4473
|
+
dynamicRange.widthNormalized,
|
|
4474
|
+
RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR,
|
|
4475
|
+
);
|
|
4476
|
+
|
|
4477
|
+
let minRelativeChange = RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE;
|
|
4478
|
+
if (this.replicationController.maxMemoryLimit != null) {
|
|
4479
|
+
minRelativeChange =
|
|
4480
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT;
|
|
4481
|
+
} else if (this.replicationController.maxCPUUsage != null) {
|
|
4482
|
+
minRelativeChange =
|
|
4483
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT;
|
|
4484
|
+
}
|
|
4286
4485
|
|
|
4287
|
-
if (relativeDifference >
|
|
4486
|
+
if (relativeDifference > minRelativeChange) {
|
|
4288
4487
|
// TODO can not reuse old range, since it will (potentially) affect the index because of sideeffects
|
|
4289
4488
|
dynamicRange = new this.indexableDomain.constructorRange({
|
|
4290
4489
|
offset: dynamicRange.start1,
|