@peerbit/shared-log 12.2.0-369b236 → 12.2.0-6aaa5dd
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/pid-convergence.d.ts +2 -0
- package/dist/benchmark/pid-convergence.d.ts.map +1 -0
- package/dist/benchmark/pid-convergence.js +138 -0
- package/dist/benchmark/pid-convergence.js.map +1 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.d.ts +2 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.d.ts.map +1 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.js +104 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.js.map +1 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.d.ts +2 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.d.ts.map +1 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.js +112 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.js.map +1 -0
- package/dist/benchmark/sync-catchup.d.ts +3 -0
- package/dist/benchmark/sync-catchup.d.ts.map +1 -0
- package/dist/benchmark/sync-catchup.js +109 -0
- package/dist/benchmark/sync-catchup.js.map +1 -0
- package/dist/src/index.d.ts +10 -3
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +242 -82
- package/dist/src/index.js.map +1 -1
- package/dist/src/ranges.d.ts +1 -0
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +48 -18
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/index.d.ts +14 -0
- package/dist/src/sync/index.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.d.ts +14 -22
- package/dist/src/sync/rateless-iblt.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.js +137 -22
- package/dist/src/sync/rateless-iblt.js.map +1 -1
- package/dist/src/sync/simple.d.ts +3 -1
- package/dist/src/sync/simple.d.ts.map +1 -1
- package/dist/src/sync/simple.js +23 -1
- package/dist/src/sync/simple.js.map +1 -1
- package/package.json +18 -18
- package/src/index.ts +306 -126
- package/src/ranges.ts +97 -65
- package/src/sync/index.ts +19 -0
- package/src/sync/rateless-iblt.ts +187 -41
- package/src/sync/simple.ts +25 -2
package/src/index.ts
CHANGED
|
@@ -135,7 +135,11 @@ import {
|
|
|
135
135
|
maxReplicas,
|
|
136
136
|
} from "./replication.js";
|
|
137
137
|
import { Observer, Replicator } from "./role.js";
|
|
138
|
-
import type {
|
|
138
|
+
import type {
|
|
139
|
+
SyncOptions,
|
|
140
|
+
SynchronizerConstructor,
|
|
141
|
+
Syncronizer,
|
|
142
|
+
} from "./sync/index.js";
|
|
139
143
|
import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
|
|
140
144
|
import { SimpleSyncronizer } from "./sync/simple.js";
|
|
141
145
|
import { groupByGid } from "./utils.js";
|
|
@@ -358,6 +362,7 @@ export type SharedLogOptions<
|
|
|
358
362
|
keep?: (
|
|
359
363
|
entry: ShallowOrFullEntry<T> | EntryReplicated<R>,
|
|
360
364
|
) => Promise<boolean> | boolean;
|
|
365
|
+
sync?: SyncOptions<R>;
|
|
361
366
|
syncronizer?: SynchronizerConstructor<R>;
|
|
362
367
|
timeUntilRoleMaturity?: number;
|
|
363
368
|
waitForReplicatorTimeout?: number;
|
|
@@ -371,11 +376,18 @@ export type SharedLogOptions<
|
|
|
371
376
|
export const DEFAULT_MIN_REPLICAS = 2;
|
|
372
377
|
export const WAIT_FOR_REPLICATOR_TIMEOUT = 9000;
|
|
373
378
|
export const WAIT_FOR_ROLE_MATURITY = 5000;
|
|
374
|
-
|
|
379
|
+
// TODO(prune): Investigate if/when a non-zero prune delay is required for correctness
|
|
380
|
+
// (e.g. responsibility/replication-info message reordering in multi-peer scenarios).
|
|
381
|
+
// Prefer making pruning robust without timing-based heuristics.
|
|
382
|
+
export const WAIT_FOR_PRUNE_DELAY = 0;
|
|
375
383
|
const PRUNE_DEBOUNCE_INTERVAL = 500;
|
|
376
384
|
|
|
377
385
|
// DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
|
|
378
386
|
const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
|
|
387
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE = 0.01;
|
|
388
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
|
|
389
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
|
|
390
|
+
const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
|
|
379
391
|
|
|
380
392
|
const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
|
|
381
393
|
|
|
@@ -451,6 +463,7 @@ export class SharedLog<
|
|
|
451
463
|
private recentlyRebalanced!: Cache<string>;
|
|
452
464
|
|
|
453
465
|
uniqueReplicators!: Set<string>;
|
|
466
|
+
private _replicatorsReconciled!: boolean;
|
|
454
467
|
|
|
455
468
|
/* private _totalParticipation!: number; */
|
|
456
469
|
|
|
@@ -612,15 +625,6 @@ export class SharedLog<
|
|
|
612
625
|
) {
|
|
613
626
|
this.rebalanceParticipationDebounced = undefined;
|
|
614
627
|
|
|
615
|
-
// make the rebalancing to respect warmup time
|
|
616
|
-
let intervalTime = interval * 2;
|
|
617
|
-
let timeout = setTimeout(() => {
|
|
618
|
-
intervalTime = interval;
|
|
619
|
-
}, this.timeUntilRoleMaturity);
|
|
620
|
-
this._closeController.signal.addEventListener("abort", () => {
|
|
621
|
-
clearTimeout(timeout);
|
|
622
|
-
});
|
|
623
|
-
|
|
624
628
|
this.rebalanceParticipationDebounced = debounceFixedInterval(
|
|
625
629
|
() => this.rebalanceParticipation(),
|
|
626
630
|
/* Math.max(
|
|
@@ -630,7 +634,7 @@ export class SharedLog<
|
|
|
630
634
|
REBALANCE_DEBOUNCE_INTERVAL
|
|
631
635
|
)
|
|
632
636
|
) */
|
|
633
|
-
|
|
637
|
+
interval, // TODO make this dynamic on the number of replicators
|
|
634
638
|
);
|
|
635
639
|
}
|
|
636
640
|
|
|
@@ -1161,16 +1165,31 @@ export class SharedLog<
|
|
|
1161
1165
|
|
|
1162
1166
|
let prevCount = deleted.length;
|
|
1163
1167
|
|
|
1164
|
-
|
|
1168
|
+
const existingById = new Map(deleted.map((x) => [x.idString, x]));
|
|
1169
|
+
const hasSameRanges =
|
|
1170
|
+
deleted.length === ranges.length &&
|
|
1171
|
+
ranges.every((range) => {
|
|
1172
|
+
const existing = existingById.get(range.idString);
|
|
1173
|
+
return existing != null && existing.equalRange(range);
|
|
1174
|
+
});
|
|
1165
1175
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
})
|
|
1173
|
-
|
|
1176
|
+
// Avoid churn on repeated full-state announcements that don't change any
|
|
1177
|
+
// replication ranges. This prevents unnecessary `replication:change`
|
|
1178
|
+
// events and rebalancing cascades.
|
|
1179
|
+
if (hasSameRanges) {
|
|
1180
|
+
diffs = [];
|
|
1181
|
+
} else {
|
|
1182
|
+
await this.replicationIndex.del({ query: { hash: from.hashcode() } });
|
|
1183
|
+
|
|
1184
|
+
diffs = [
|
|
1185
|
+
...deleted.map((x) => {
|
|
1186
|
+
return { range: x, type: "removed" as const, timestamp };
|
|
1187
|
+
}),
|
|
1188
|
+
...ranges.map((x) => {
|
|
1189
|
+
return { range: x, type: "added" as const, timestamp };
|
|
1190
|
+
}),
|
|
1191
|
+
];
|
|
1192
|
+
}
|
|
1174
1193
|
|
|
1175
1194
|
isNewReplicator = prevCount === 0 && ranges.length > 0;
|
|
1176
1195
|
} else {
|
|
@@ -1193,16 +1212,20 @@ export class SharedLog<
|
|
|
1193
1212
|
}
|
|
1194
1213
|
}
|
|
1195
1214
|
|
|
1215
|
+
let prevCountForOwner: number | undefined = undefined;
|
|
1196
1216
|
if (existing.length === 0) {
|
|
1197
|
-
|
|
1217
|
+
prevCountForOwner = await this.replicationIndex.count({
|
|
1198
1218
|
query: new StringMatch({ key: "hash", value: from.hashcode() }),
|
|
1199
1219
|
});
|
|
1200
|
-
isNewReplicator =
|
|
1220
|
+
isNewReplicator = prevCountForOwner === 0;
|
|
1201
1221
|
} else {
|
|
1202
1222
|
isNewReplicator = false;
|
|
1203
1223
|
}
|
|
1204
1224
|
|
|
1205
|
-
if (
|
|
1225
|
+
if (
|
|
1226
|
+
checkDuplicates &&
|
|
1227
|
+
(existing.length > 0 || (prevCountForOwner ?? 0) > 0)
|
|
1228
|
+
) {
|
|
1206
1229
|
let deduplicated: ReplicationRangeIndexable<any>[] = [];
|
|
1207
1230
|
|
|
1208
1231
|
// TODO also deduplicate/de-overlap among the ranges that ought to be inserted?
|
|
@@ -1869,6 +1892,7 @@ export class SharedLog<
|
|
|
1869
1892
|
this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
|
|
1870
1893
|
|
|
1871
1894
|
this.uniqueReplicators = new Set();
|
|
1895
|
+
this._replicatorsReconciled = false;
|
|
1872
1896
|
|
|
1873
1897
|
this.openTime = +new Date();
|
|
1874
1898
|
this.oldestOpenTime = this.openTime;
|
|
@@ -1878,8 +1902,8 @@ export class SharedLog<
|
|
|
1878
1902
|
this.timeUntilRoleMaturity =
|
|
1879
1903
|
options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
|
|
1880
1904
|
this.waitForReplicatorTimeout =
|
|
1881
|
-
options?.waitForReplicatorTimeout
|
|
1882
|
-
this.waitForPruneDelay = options?.waitForPruneDelay
|
|
1905
|
+
options?.waitForReplicatorTimeout ?? WAIT_FOR_REPLICATOR_TIMEOUT;
|
|
1906
|
+
this.waitForPruneDelay = options?.waitForPruneDelay ?? WAIT_FOR_PRUNE_DELAY;
|
|
1883
1907
|
|
|
1884
1908
|
if (this.waitForReplicatorTimeout < this.timeUntilRoleMaturity) {
|
|
1885
1909
|
this.waitForReplicatorTimeout = this.timeUntilRoleMaturity; // does not makes sense to expect a replicator to mature faster than it is reachable
|
|
@@ -2043,6 +2067,7 @@ export class SharedLog<
|
|
|
2043
2067
|
rangeIndex: this._replicationRangeIndex,
|
|
2044
2068
|
rpc: this.rpc,
|
|
2045
2069
|
coordinateToHash: this.coordinateToHash,
|
|
2070
|
+
sync: options?.sync,
|
|
2046
2071
|
});
|
|
2047
2072
|
} else {
|
|
2048
2073
|
if (
|
|
@@ -2054,6 +2079,7 @@ export class SharedLog<
|
|
|
2054
2079
|
rpc: this.rpc,
|
|
2055
2080
|
entryIndex: this.entryCoordinatesIndex,
|
|
2056
2081
|
coordinateToHash: this.coordinateToHash,
|
|
2082
|
+
sync: options?.sync,
|
|
2057
2083
|
});
|
|
2058
2084
|
} else {
|
|
2059
2085
|
if (this.domain.resolution === "u32") {
|
|
@@ -2069,6 +2095,7 @@ export class SharedLog<
|
|
|
2069
2095
|
rangeIndex: this._replicationRangeIndex,
|
|
2070
2096
|
rpc: this.rpc,
|
|
2071
2097
|
coordinateToHash: this.coordinateToHash,
|
|
2098
|
+
sync: options?.sync,
|
|
2072
2099
|
}) as Syncronizer<R>;
|
|
2073
2100
|
}
|
|
2074
2101
|
}
|
|
@@ -2168,7 +2195,16 @@ export class SharedLog<
|
|
|
2168
2195
|
await super.afterOpen();
|
|
2169
2196
|
|
|
2170
2197
|
// We do this here, because these calls requires this.closed == false
|
|
2171
|
-
this.pruneOfflineReplicators()
|
|
2198
|
+
void this.pruneOfflineReplicators()
|
|
2199
|
+
.then(() => {
|
|
2200
|
+
this._replicatorsReconciled = true;
|
|
2201
|
+
})
|
|
2202
|
+
.catch((error) => {
|
|
2203
|
+
if (isNotStartedError(error as Error)) {
|
|
2204
|
+
return;
|
|
2205
|
+
}
|
|
2206
|
+
logger.error(error);
|
|
2207
|
+
});
|
|
2172
2208
|
|
|
2173
2209
|
await this.rebalanceParticipation();
|
|
2174
2210
|
|
|
@@ -2384,15 +2420,35 @@ export class SharedLog<
|
|
|
2384
2420
|
set.add(key);
|
|
2385
2421
|
}
|
|
2386
2422
|
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2423
|
+
if (options?.reachableOnly) {
|
|
2424
|
+
// Prefer the live pubsub subscriber set when filtering reachability.
|
|
2425
|
+
// `uniqueReplicators` is primarily driven by replication messages and can lag during
|
|
2426
|
+
// joins/restarts; using subscribers prevents excluding peers that are reachable but
|
|
2427
|
+
// whose replication ranges were loaded from disk or haven't been processed yet.
|
|
2428
|
+
const subscribers =
|
|
2429
|
+
(await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
2430
|
+
undefined;
|
|
2431
|
+
const subscriberHashcodes = subscribers
|
|
2432
|
+
? new Set(subscribers.map((key) => key.hashcode()))
|
|
2433
|
+
: undefined;
|
|
2434
|
+
|
|
2435
|
+
const reachable: string[] = [];
|
|
2436
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2437
|
+
for (const peer of set) {
|
|
2438
|
+
if (peer === selfHash) {
|
|
2439
|
+
reachable.push(peer);
|
|
2440
|
+
continue;
|
|
2441
|
+
}
|
|
2442
|
+
if (
|
|
2443
|
+
subscriberHashcodes
|
|
2444
|
+
? subscriberHashcodes.has(peer)
|
|
2445
|
+
: this.uniqueReplicators.has(peer)
|
|
2446
|
+
) {
|
|
2447
|
+
reachable.push(peer);
|
|
2448
|
+
}
|
|
2392
2449
|
}
|
|
2450
|
+
return reachable;
|
|
2393
2451
|
}
|
|
2394
|
-
return reachableSet;
|
|
2395
|
-
}
|
|
2396
2452
|
|
|
2397
2453
|
return [...set];
|
|
2398
2454
|
} catch (error) {
|
|
@@ -2836,22 +2892,20 @@ export class SharedLog<
|
|
|
2836
2892
|
context.from!.hashcode(),
|
|
2837
2893
|
);
|
|
2838
2894
|
} else if (msg instanceof RequestReplicationInfoMessage) {
|
|
2839
|
-
// TODO this message type is never used, should we remove it?
|
|
2840
|
-
|
|
2841
2895
|
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2842
2896
|
return;
|
|
2843
2897
|
}
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
x.toReplicationRange(),
|
|
2848
|
-
),
|
|
2849
|
-
}),
|
|
2850
|
-
{
|
|
2851
|
-
mode: new SilentDelivery({ to: [context.from], redundancy: 1 }),
|
|
2852
|
-
},
|
|
2898
|
+
|
|
2899
|
+
const segments = (await this.getMyReplicationSegments()).map((x) =>
|
|
2900
|
+
x.toReplicationRange(),
|
|
2853
2901
|
);
|
|
2854
2902
|
|
|
2903
|
+
this.rpc
|
|
2904
|
+
.send(new AllReplicatingSegmentsMessage({ segments }), {
|
|
2905
|
+
mode: new SeekDelivery({ to: [context.from], redundancy: 1 }),
|
|
2906
|
+
})
|
|
2907
|
+
.catch((e) => logger.error(e.toString()));
|
|
2908
|
+
|
|
2855
2909
|
// for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
|
|
2856
2910
|
if (this.v8Behaviour) {
|
|
2857
2911
|
const role = this.getRole();
|
|
@@ -2873,73 +2927,60 @@ export class SharedLog<
|
|
|
2873
2927
|
}
|
|
2874
2928
|
}
|
|
2875
2929
|
} else if (
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
|
|
2882
|
-
|
|
2883
|
-
let replicationInfoMessage = msg as
|
|
2884
|
-
| AllReplicatingSegmentsMessage
|
|
2885
|
-
| AddedReplicationSegmentMessage;
|
|
2886
|
-
|
|
2887
|
-
// we have this statement because peers might have changed/announced their role,
|
|
2888
|
-
// but we don't know them as "subscribers" yet. i.e. they are not online
|
|
2889
|
-
|
|
2890
|
-
this.waitFor(context.from, {
|
|
2891
|
-
signal: this._closeController.signal,
|
|
2892
|
-
timeout: this.waitForReplicatorTimeout,
|
|
2893
|
-
})
|
|
2894
|
-
.then(async () => {
|
|
2895
|
-
// do use an operation log here, because we want to make sure that we don't miss any updates
|
|
2896
|
-
// and do them in the right order
|
|
2897
|
-
const prev = this.latestReplicationInfoMessage.get(
|
|
2898
|
-
context.from!.hashcode(),
|
|
2899
|
-
);
|
|
2930
|
+
msg instanceof AllReplicatingSegmentsMessage ||
|
|
2931
|
+
msg instanceof AddedReplicationSegmentMessage
|
|
2932
|
+
) {
|
|
2933
|
+
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2934
|
+
return;
|
|
2935
|
+
}
|
|
2900
2936
|
|
|
2901
|
-
|
|
2937
|
+
const replicationInfoMessage = msg as
|
|
2938
|
+
| AllReplicatingSegmentsMessage
|
|
2939
|
+
| AddedReplicationSegmentMessage;
|
|
2940
|
+
|
|
2941
|
+
// Process replication updates even if the sender isn't yet considered "ready" by
|
|
2942
|
+
// `Program.waitFor()`. Dropping these messages can lead to missing replicator info
|
|
2943
|
+
// (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
|
|
2944
|
+
const from = context.from!;
|
|
2945
|
+
const messageTimestamp = context.message.header.timestamp;
|
|
2946
|
+
(async () => {
|
|
2947
|
+
const prev = this.latestReplicationInfoMessage.get(from.hashcode());
|
|
2948
|
+
if (prev && prev > messageTimestamp) {
|
|
2902
2949
|
return;
|
|
2903
2950
|
}
|
|
2904
2951
|
|
|
2905
|
-
this.latestReplicationInfoMessage.set(
|
|
2906
|
-
context.from!.hashcode(),
|
|
2907
|
-
context.message.header.timestamp,
|
|
2908
|
-
);
|
|
2909
|
-
|
|
2910
|
-
let reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2952
|
+
this.latestReplicationInfoMessage.set(from.hashcode(), messageTimestamp);
|
|
2911
2953
|
|
|
2912
2954
|
if (this.closed) {
|
|
2913
2955
|
return;
|
|
2914
2956
|
}
|
|
2915
2957
|
|
|
2958
|
+
const reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2916
2959
|
await this.addReplicationRange(
|
|
2917
2960
|
replicationInfoMessage.segments.map((x) =>
|
|
2918
|
-
x.toReplicationRangeIndexable(
|
|
2961
|
+
x.toReplicationRangeIndexable(from),
|
|
2919
2962
|
),
|
|
2920
|
-
|
|
2963
|
+
from,
|
|
2921
2964
|
{
|
|
2922
2965
|
reset,
|
|
2923
2966
|
checkDuplicates: true,
|
|
2924
|
-
timestamp: Number(
|
|
2967
|
+
timestamp: Number(messageTimestamp),
|
|
2925
2968
|
},
|
|
2926
2969
|
);
|
|
2927
|
-
|
|
2928
|
-
/* await this._modifyReplicators(msg.role, context.from!); */
|
|
2929
|
-
})
|
|
2930
|
-
.catch((e) => {
|
|
2970
|
+
})().catch((e) => {
|
|
2931
2971
|
if (isNotStartedError(e)) {
|
|
2932
2972
|
return;
|
|
2933
2973
|
}
|
|
2934
2974
|
logger.error(
|
|
2935
|
-
|
|
2936
|
-
e?.message
|
|
2975
|
+
`Failed to apply replication settings from '${from.hashcode()}': ${
|
|
2976
|
+
e?.message ?? e
|
|
2977
|
+
}`,
|
|
2937
2978
|
);
|
|
2938
2979
|
});
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
|
|
2942
|
-
|
|
2980
|
+
} else if (msg instanceof StoppedReplicating) {
|
|
2981
|
+
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2982
|
+
return;
|
|
2983
|
+
}
|
|
2943
2984
|
|
|
2944
2985
|
const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(
|
|
2945
2986
|
msg.segmentIds,
|
|
@@ -3132,25 +3173,29 @@ export class SharedLog<
|
|
|
3132
3173
|
},
|
|
3133
3174
|
): Promise<void> {
|
|
3134
3175
|
let entriesToReplicate: Entry<T>[] = [];
|
|
3135
|
-
if (options?.replicate) {
|
|
3176
|
+
if (options?.replicate && this.log.length > 0) {
|
|
3136
3177
|
// TODO this block should perhaps be called from a callback on the this.log.join method on all the ignored element because already joined, like "onAlreadyJoined"
|
|
3137
3178
|
|
|
3138
3179
|
// check which entrise we already have but not are replicating, and replicate them
|
|
3139
3180
|
// we can not just do the 'join' call because it will ignore the already joined entries
|
|
3140
3181
|
for (const element of entries) {
|
|
3141
3182
|
if (typeof element === "string") {
|
|
3142
|
-
|
|
3143
|
-
|
|
3144
|
-
|
|
3183
|
+
if (await this.log.has(element)) {
|
|
3184
|
+
const entry = await this.log.get(element);
|
|
3185
|
+
if (entry) {
|
|
3186
|
+
entriesToReplicate.push(entry);
|
|
3187
|
+
}
|
|
3145
3188
|
}
|
|
3146
3189
|
} else if (element instanceof Entry) {
|
|
3147
3190
|
if (await this.log.has(element.hash)) {
|
|
3148
3191
|
entriesToReplicate.push(element);
|
|
3149
3192
|
}
|
|
3150
3193
|
} else {
|
|
3151
|
-
|
|
3152
|
-
|
|
3153
|
-
|
|
3194
|
+
if (await this.log.has(element.hash)) {
|
|
3195
|
+
const entry = await this.log.get(element.hash);
|
|
3196
|
+
if (entry) {
|
|
3197
|
+
entriesToReplicate.push(entry);
|
|
3198
|
+
}
|
|
3154
3199
|
}
|
|
3155
3200
|
}
|
|
3156
3201
|
}
|
|
@@ -3269,6 +3314,7 @@ export class SharedLog<
|
|
|
3269
3314
|
|
|
3270
3315
|
let settled = false;
|
|
3271
3316
|
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
3317
|
+
let requestTimer: ReturnType<typeof setTimeout> | undefined;
|
|
3272
3318
|
|
|
3273
3319
|
const clear = () => {
|
|
3274
3320
|
this.events.removeEventListener("replicator:mature", check);
|
|
@@ -3278,6 +3324,10 @@ export class SharedLog<
|
|
|
3278
3324
|
clearTimeout(timer);
|
|
3279
3325
|
timer = undefined;
|
|
3280
3326
|
}
|
|
3327
|
+
if (requestTimer != null) {
|
|
3328
|
+
clearTimeout(requestTimer);
|
|
3329
|
+
requestTimer = undefined;
|
|
3330
|
+
}
|
|
3281
3331
|
};
|
|
3282
3332
|
|
|
3283
3333
|
const resolve = () => {
|
|
@@ -3309,6 +3359,40 @@ export class SharedLog<
|
|
|
3309
3359
|
);
|
|
3310
3360
|
}, timeoutMs);
|
|
3311
3361
|
|
|
3362
|
+
let requestAttempts = 0;
|
|
3363
|
+
const requestIntervalMs = 1000;
|
|
3364
|
+
const maxRequestAttempts = Math.max(
|
|
3365
|
+
3,
|
|
3366
|
+
Math.ceil(timeoutMs / requestIntervalMs),
|
|
3367
|
+
);
|
|
3368
|
+
|
|
3369
|
+
const requestReplicationInfo = () => {
|
|
3370
|
+
if (settled || this.closed) {
|
|
3371
|
+
return;
|
|
3372
|
+
}
|
|
3373
|
+
|
|
3374
|
+
if (requestAttempts >= maxRequestAttempts) {
|
|
3375
|
+
return;
|
|
3376
|
+
}
|
|
3377
|
+
|
|
3378
|
+
requestAttempts++;
|
|
3379
|
+
|
|
3380
|
+
this.rpc
|
|
3381
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3382
|
+
mode: new SeekDelivery({ redundancy: 1, to: [key] }),
|
|
3383
|
+
})
|
|
3384
|
+
.catch((e) => {
|
|
3385
|
+
// Best-effort: missing peers / unopened RPC should not fail the wait logic.
|
|
3386
|
+
if (isNotStartedError(e as Error)) {
|
|
3387
|
+
return;
|
|
3388
|
+
}
|
|
3389
|
+
});
|
|
3390
|
+
|
|
3391
|
+
if (requestAttempts < maxRequestAttempts) {
|
|
3392
|
+
requestTimer = setTimeout(requestReplicationInfo, requestIntervalMs);
|
|
3393
|
+
}
|
|
3394
|
+
};
|
|
3395
|
+
|
|
3312
3396
|
const check = async () => {
|
|
3313
3397
|
const iterator = this.replicationIndex?.iterate(
|
|
3314
3398
|
{ query: new StringMatch({ key: "hash", value: key.hashcode() }) },
|
|
@@ -3333,6 +3417,7 @@ export class SharedLog<
|
|
|
3333
3417
|
}
|
|
3334
3418
|
};
|
|
3335
3419
|
|
|
3420
|
+
requestReplicationInfo();
|
|
3336
3421
|
check();
|
|
3337
3422
|
this.events.addEventListener("replicator:mature", check);
|
|
3338
3423
|
this.events.addEventListener("replication:change", check);
|
|
@@ -3582,27 +3667,54 @@ export class SharedLog<
|
|
|
3582
3667
|
return 0;
|
|
3583
3668
|
}
|
|
3584
3669
|
|
|
3585
|
-
|
|
3586
|
-
|
|
3587
|
-
|
|
3588
|
-
|
|
3589
|
-
?.length ?? 1);
|
|
3590
|
-
const diffToOldest =
|
|
3591
|
-
subscribers > 1 ? now - this.oldestOpenTime - 1 : Number.MAX_SAFE_INTEGER;
|
|
3592
|
-
|
|
3593
|
-
const result = Math.min(
|
|
3594
|
-
this.timeUntilRoleMaturity,
|
|
3595
|
-
Math.max(diffToOldest, this.timeUntilRoleMaturity),
|
|
3596
|
-
Math.max(
|
|
3597
|
-
Math.round(
|
|
3598
|
-
(this.timeUntilRoleMaturity * Math.log(subscribers + 1)) / 3,
|
|
3599
|
-
),
|
|
3600
|
-
this.timeUntilRoleMaturity,
|
|
3601
|
-
),
|
|
3602
|
-
); // / 3 so that if 2 replicators and timeUntilRoleMaturity = 1e4 the result will be 1
|
|
3670
|
+
// Explicitly disable maturity gating (used by many tests).
|
|
3671
|
+
if (this.timeUntilRoleMaturity <= 0) {
|
|
3672
|
+
return 0;
|
|
3673
|
+
}
|
|
3603
3674
|
|
|
3604
|
-
|
|
3605
|
-
|
|
3675
|
+
// If we're alone (or pubsub isn't ready), a fixed maturity time is sufficient.
|
|
3676
|
+
// When there are multiple replicators we want a stable threshold that doesn't
|
|
3677
|
+
// depend on "now" (otherwise it can drift and turn into a flake).
|
|
3678
|
+
let subscribers = 1;
|
|
3679
|
+
if (!this.rpc.closed) {
|
|
3680
|
+
try {
|
|
3681
|
+
subscribers =
|
|
3682
|
+
(await this.node.services.pubsub.getSubscribers(this.rpc.topic))
|
|
3683
|
+
?.length ?? 1;
|
|
3684
|
+
} catch {
|
|
3685
|
+
// Best-effort only; fall back to 1.
|
|
3686
|
+
}
|
|
3687
|
+
}
|
|
3688
|
+
|
|
3689
|
+
if (subscribers <= 1) {
|
|
3690
|
+
return this.timeUntilRoleMaturity;
|
|
3691
|
+
}
|
|
3692
|
+
|
|
3693
|
+
// Use replication range timestamps to compute a stable "age gap" between the
|
|
3694
|
+
// newest and oldest known roles. This keeps the oldest role mature while
|
|
3695
|
+
// preventing newer roles from being treated as mature purely because time
|
|
3696
|
+
// passes between test steps / network events.
|
|
3697
|
+
let newestOpenTime = this.openTime;
|
|
3698
|
+
try {
|
|
3699
|
+
const newestIterator = await this.replicationIndex.iterate(
|
|
3700
|
+
{
|
|
3701
|
+
sort: [new Sort({ key: "timestamp", direction: "desc" })],
|
|
3702
|
+
},
|
|
3703
|
+
{ shape: { timestamp: true }, reference: true },
|
|
3704
|
+
);
|
|
3705
|
+
const newestTimestampFromDB = (await newestIterator.next(1))[0]?.value
|
|
3706
|
+
.timestamp;
|
|
3707
|
+
await newestIterator.close();
|
|
3708
|
+
if (newestTimestampFromDB != null) {
|
|
3709
|
+
newestOpenTime = Number(newestTimestampFromDB);
|
|
3710
|
+
}
|
|
3711
|
+
} catch {
|
|
3712
|
+
// Best-effort only; fall back to local open time.
|
|
3713
|
+
}
|
|
3714
|
+
|
|
3715
|
+
const ageGapToOldest = newestOpenTime - this.oldestOpenTime;
|
|
3716
|
+
const roleAge = Math.max(this.timeUntilRoleMaturity, ageGapToOldest);
|
|
3717
|
+
return roleAge < 0 ? 0 : roleAge;
|
|
3606
3718
|
}
|
|
3607
3719
|
|
|
3608
3720
|
async findLeaders(
|
|
@@ -3681,13 +3793,37 @@ export class SharedLog<
|
|
|
3681
3793
|
},
|
|
3682
3794
|
): Promise<Map<string, { intersecting: boolean }>> {
|
|
3683
3795
|
const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
|
|
3796
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3797
|
+
|
|
3798
|
+
// Use `uniqueReplicators` (replicator cache) once we've reconciled it against the
|
|
3799
|
+
// persisted replication index. Until then, fall back to live pubsub subscribers
|
|
3800
|
+
// and avoid relying on `uniqueReplicators` being complete.
|
|
3801
|
+
let peerFilter: Set<string> | undefined = undefined;
|
|
3802
|
+
if (this._replicatorsReconciled && this.uniqueReplicators.size > 0) {
|
|
3803
|
+
peerFilter = this.uniqueReplicators.has(selfHash)
|
|
3804
|
+
? this.uniqueReplicators
|
|
3805
|
+
: new Set([...this.uniqueReplicators, selfHash]);
|
|
3806
|
+
} else {
|
|
3807
|
+
try {
|
|
3808
|
+
const subscribers =
|
|
3809
|
+
(await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
3810
|
+
undefined;
|
|
3811
|
+
if (subscribers && subscribers.length > 0) {
|
|
3812
|
+
peerFilter = new Set(subscribers.map((key) => key.hashcode()));
|
|
3813
|
+
peerFilter.add(selfHash);
|
|
3814
|
+
}
|
|
3815
|
+
} catch {
|
|
3816
|
+
// Best-effort only; if pubsub isn't ready, do a full scan.
|
|
3817
|
+
}
|
|
3818
|
+
}
|
|
3684
3819
|
return getSamples<R>(
|
|
3685
3820
|
cursors,
|
|
3686
3821
|
this.replicationIndex,
|
|
3687
3822
|
roleAge,
|
|
3688
3823
|
this.indexableDomain.numbers,
|
|
3689
3824
|
{
|
|
3690
|
-
|
|
3825
|
+
peerFilter,
|
|
3826
|
+
uniqueReplicators: peerFilter,
|
|
3691
3827
|
},
|
|
3692
3828
|
);
|
|
3693
3829
|
}
|
|
@@ -3742,7 +3878,7 @@ export class SharedLog<
|
|
|
3742
3878
|
for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
|
|
3743
3879
|
v.delete(publicKey.hashcode());
|
|
3744
3880
|
if (v.size === 0) {
|
|
3745
|
-
this.
|
|
3881
|
+
this._requestIPruneResponseReplicatorSet.delete(k);
|
|
3746
3882
|
}
|
|
3747
3883
|
}
|
|
3748
3884
|
|
|
@@ -3781,6 +3917,15 @@ export class SharedLog<
|
|
|
3781
3917
|
.catch((e) => logger.error(e.toString()));
|
|
3782
3918
|
}
|
|
3783
3919
|
}
|
|
3920
|
+
|
|
3921
|
+
// Request the remote peer's replication info. This makes joins resilient to
|
|
3922
|
+
// timing-sensitive delivery/order issues where we may miss their initial
|
|
3923
|
+
// replication announcement.
|
|
3924
|
+
this.rpc
|
|
3925
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3926
|
+
mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
|
|
3927
|
+
})
|
|
3928
|
+
.catch((e) => logger.error(e.toString()));
|
|
3784
3929
|
} else {
|
|
3785
3930
|
await this.removeReplicator(publicKey);
|
|
3786
3931
|
}
|
|
@@ -3940,11 +4085,28 @@ export class SharedLog<
|
|
|
3940
4085
|
|
|
3941
4086
|
let cursor: NumberFromType<R>[] | undefined = undefined;
|
|
3942
4087
|
|
|
3943
|
-
|
|
4088
|
+
// Checked prune requests can legitimately take longer than a fixed 10s:
|
|
4089
|
+
// - The remote may not have the entry yet and will wait up to `_respondToIHaveTimeout`
|
|
4090
|
+
// - Leadership/replicator information may take up to `waitForReplicatorTimeout` to settle
|
|
4091
|
+
// If we time out too early we can end up with permanently prunable heads that never
|
|
4092
|
+
// get retried (a common CI flake in "prune before join" tests).
|
|
4093
|
+
const checkedPruneTimeoutMs =
|
|
4094
|
+
options?.timeout ??
|
|
4095
|
+
Math.max(
|
|
4096
|
+
10_000,
|
|
4097
|
+
Number(this._respondToIHaveTimeout ?? 0) +
|
|
4098
|
+
this.waitForReplicatorTimeout +
|
|
4099
|
+
PRUNE_DEBOUNCE_INTERVAL * 2,
|
|
4100
|
+
);
|
|
4101
|
+
|
|
4102
|
+
const timeout = setTimeout(() => {
|
|
3944
4103
|
reject(
|
|
3945
|
-
new Error(
|
|
4104
|
+
new Error(
|
|
4105
|
+
`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`,
|
|
4106
|
+
),
|
|
3946
4107
|
);
|
|
3947
|
-
},
|
|
4108
|
+
}, checkedPruneTimeoutMs);
|
|
4109
|
+
timeout.unref?.();
|
|
3948
4110
|
|
|
3949
4111
|
this._pendingDeletes.set(entry.hash, {
|
|
3950
4112
|
promise: deferredPromise,
|
|
@@ -4092,8 +4254,13 @@ export class SharedLog<
|
|
|
4092
4254
|
);
|
|
4093
4255
|
}
|
|
4094
4256
|
|
|
4095
|
-
async waitForPruned(
|
|
4096
|
-
|
|
4257
|
+
async waitForPruned(options?: {
|
|
4258
|
+
timeout?: number;
|
|
4259
|
+
signal?: AbortSignal;
|
|
4260
|
+
delayInterval?: number;
|
|
4261
|
+
timeoutMessage?: string;
|
|
4262
|
+
}) {
|
|
4263
|
+
await waitFor(() => this._pendingDeletes.size === 0, options);
|
|
4097
4264
|
}
|
|
4098
4265
|
|
|
4099
4266
|
async onReplicationChange(
|
|
@@ -4272,11 +4439,24 @@ export class SharedLog<
|
|
|
4272
4439
|
cpuUsage: this.cpuUsage?.value(),
|
|
4273
4440
|
});
|
|
4274
4441
|
|
|
4442
|
+
const absoluteDifference = Math.abs(dynamicRange.widthNormalized - newFactor);
|
|
4275
4443
|
const relativeDifference =
|
|
4276
|
-
|
|
4277
|
-
|
|
4444
|
+
absoluteDifference /
|
|
4445
|
+
Math.max(
|
|
4446
|
+
dynamicRange.widthNormalized,
|
|
4447
|
+
RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR,
|
|
4448
|
+
);
|
|
4449
|
+
|
|
4450
|
+
let minRelativeChange = RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE;
|
|
4451
|
+
if (this.replicationController.maxMemoryLimit != null) {
|
|
4452
|
+
minRelativeChange =
|
|
4453
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT;
|
|
4454
|
+
} else if (this.replicationController.maxCPUUsage != null) {
|
|
4455
|
+
minRelativeChange =
|
|
4456
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT;
|
|
4457
|
+
}
|
|
4278
4458
|
|
|
4279
|
-
if (relativeDifference >
|
|
4459
|
+
if (relativeDifference > minRelativeChange) {
|
|
4280
4460
|
// TODO can not reuse old range, since it will (potentially) affect the index because of sideeffects
|
|
4281
4461
|
dynamicRange = new this.indexableDomain.constructorRange({
|
|
4282
4462
|
offset: dynamicRange.start1,
|