@peerbit/shared-log 12.2.0 → 12.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/pid-convergence.d.ts +2 -0
- package/dist/benchmark/pid-convergence.d.ts.map +1 -0
- package/dist/benchmark/pid-convergence.js +138 -0
- package/dist/benchmark/pid-convergence.js.map +1 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.d.ts +2 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.d.ts.map +1 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.js +104 -0
- package/dist/benchmark/rateless-iblt-sender-startsync.js.map +1 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.d.ts +2 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.d.ts.map +1 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.js +112 -0
- package/dist/benchmark/rateless-iblt-startsync-cache.js.map +1 -0
- package/dist/benchmark/sync-catchup.d.ts +3 -0
- package/dist/benchmark/sync-catchup.d.ts.map +1 -0
- package/dist/benchmark/sync-catchup.js +109 -0
- package/dist/benchmark/sync-catchup.js.map +1 -0
- package/dist/src/index.d.ts +16 -3
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +259 -82
- package/dist/src/index.js.map +1 -1
- package/dist/src/ranges.d.ts +1 -0
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +48 -18
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/index.d.ts +14 -0
- package/dist/src/sync/index.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.d.ts +14 -22
- package/dist/src/sync/rateless-iblt.d.ts.map +1 -1
- package/dist/src/sync/rateless-iblt.js +137 -22
- package/dist/src/sync/rateless-iblt.js.map +1 -1
- package/dist/src/sync/simple.d.ts +3 -1
- package/dist/src/sync/simple.d.ts.map +1 -1
- package/dist/src/sync/simple.js +23 -1
- package/dist/src/sync/simple.js.map +1 -1
- package/package.json +12 -12
- package/src/index.ts +333 -126
- package/src/ranges.ts +97 -65
- package/src/sync/index.ts +19 -0
- package/src/sync/rateless-iblt.ts +187 -41
- package/src/sync/simple.ts +25 -2
package/src/index.ts
CHANGED
|
@@ -135,7 +135,11 @@ import {
|
|
|
135
135
|
maxReplicas,
|
|
136
136
|
} from "./replication.js";
|
|
137
137
|
import { Observer, Replicator } from "./role.js";
|
|
138
|
-
import type {
|
|
138
|
+
import type {
|
|
139
|
+
SyncOptions,
|
|
140
|
+
SynchronizerConstructor,
|
|
141
|
+
Syncronizer,
|
|
142
|
+
} from "./sync/index.js";
|
|
139
143
|
import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
|
|
140
144
|
import { SimpleSyncronizer } from "./sync/simple.js";
|
|
141
145
|
import { groupByGid } from "./utils.js";
|
|
@@ -358,9 +362,12 @@ export type SharedLogOptions<
|
|
|
358
362
|
keep?: (
|
|
359
363
|
entry: ShallowOrFullEntry<T> | EntryReplicated<R>,
|
|
360
364
|
) => Promise<boolean> | boolean;
|
|
365
|
+
sync?: SyncOptions<R>;
|
|
361
366
|
syncronizer?: SynchronizerConstructor<R>;
|
|
362
367
|
timeUntilRoleMaturity?: number;
|
|
363
368
|
waitForReplicatorTimeout?: number;
|
|
369
|
+
waitForReplicatorRequestIntervalMs?: number;
|
|
370
|
+
waitForReplicatorRequestMaxAttempts?: number;
|
|
364
371
|
waitForPruneDelay?: number;
|
|
365
372
|
distributionDebounceTime?: number;
|
|
366
373
|
compatibility?: number;
|
|
@@ -371,11 +378,20 @@ export type SharedLogOptions<
|
|
|
371
378
|
export const DEFAULT_MIN_REPLICAS = 2;
|
|
372
379
|
export const WAIT_FOR_REPLICATOR_TIMEOUT = 9000;
|
|
373
380
|
export const WAIT_FOR_ROLE_MATURITY = 5000;
|
|
374
|
-
export const
|
|
381
|
+
export const WAIT_FOR_REPLICATOR_REQUEST_INTERVAL = 1000;
|
|
382
|
+
export const WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS = 3;
|
|
383
|
+
// TODO(prune): Investigate if/when a non-zero prune delay is required for correctness
|
|
384
|
+
// (e.g. responsibility/replication-info message reordering in multi-peer scenarios).
|
|
385
|
+
// Prefer making pruning robust without timing-based heuristics.
|
|
386
|
+
export const WAIT_FOR_PRUNE_DELAY = 0;
|
|
375
387
|
const PRUNE_DEBOUNCE_INTERVAL = 500;
|
|
376
388
|
|
|
377
389
|
// DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
|
|
378
390
|
const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
|
|
391
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE = 0.01;
|
|
392
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
|
|
393
|
+
const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
|
|
394
|
+
const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
|
|
379
395
|
|
|
380
396
|
const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
|
|
381
397
|
|
|
@@ -451,6 +467,7 @@ export class SharedLog<
|
|
|
451
467
|
private recentlyRebalanced!: Cache<string>;
|
|
452
468
|
|
|
453
469
|
uniqueReplicators!: Set<string>;
|
|
470
|
+
private _replicatorsReconciled!: boolean;
|
|
454
471
|
|
|
455
472
|
/* private _totalParticipation!: number; */
|
|
456
473
|
|
|
@@ -551,6 +568,8 @@ export class SharedLog<
|
|
|
551
568
|
|
|
552
569
|
timeUntilRoleMaturity!: number;
|
|
553
570
|
waitForReplicatorTimeout!: number;
|
|
571
|
+
waitForReplicatorRequestIntervalMs!: number;
|
|
572
|
+
waitForReplicatorRequestMaxAttempts?: number;
|
|
554
573
|
waitForPruneDelay!: number;
|
|
555
574
|
distributionDebounceTime!: number;
|
|
556
575
|
|
|
@@ -612,15 +631,6 @@ export class SharedLog<
|
|
|
612
631
|
) {
|
|
613
632
|
this.rebalanceParticipationDebounced = undefined;
|
|
614
633
|
|
|
615
|
-
// make the rebalancing to respect warmup time
|
|
616
|
-
let intervalTime = interval * 2;
|
|
617
|
-
let timeout = setTimeout(() => {
|
|
618
|
-
intervalTime = interval;
|
|
619
|
-
}, this.timeUntilRoleMaturity);
|
|
620
|
-
this._closeController.signal.addEventListener("abort", () => {
|
|
621
|
-
clearTimeout(timeout);
|
|
622
|
-
});
|
|
623
|
-
|
|
624
634
|
this.rebalanceParticipationDebounced = debounceFixedInterval(
|
|
625
635
|
() => this.rebalanceParticipation(),
|
|
626
636
|
/* Math.max(
|
|
@@ -630,7 +640,7 @@ export class SharedLog<
|
|
|
630
640
|
REBALANCE_DEBOUNCE_INTERVAL
|
|
631
641
|
)
|
|
632
642
|
) */
|
|
633
|
-
|
|
643
|
+
interval, // TODO make this dynamic on the number of replicators
|
|
634
644
|
);
|
|
635
645
|
}
|
|
636
646
|
|
|
@@ -1161,16 +1171,31 @@ export class SharedLog<
|
|
|
1161
1171
|
|
|
1162
1172
|
let prevCount = deleted.length;
|
|
1163
1173
|
|
|
1164
|
-
|
|
1174
|
+
const existingById = new Map(deleted.map((x) => [x.idString, x]));
|
|
1175
|
+
const hasSameRanges =
|
|
1176
|
+
deleted.length === ranges.length &&
|
|
1177
|
+
ranges.every((range) => {
|
|
1178
|
+
const existing = existingById.get(range.idString);
|
|
1179
|
+
return existing != null && existing.equalRange(range);
|
|
1180
|
+
});
|
|
1165
1181
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
})
|
|
1173
|
-
|
|
1182
|
+
// Avoid churn on repeated full-state announcements that don't change any
|
|
1183
|
+
// replication ranges. This prevents unnecessary `replication:change`
|
|
1184
|
+
// events and rebalancing cascades.
|
|
1185
|
+
if (hasSameRanges) {
|
|
1186
|
+
diffs = [];
|
|
1187
|
+
} else {
|
|
1188
|
+
await this.replicationIndex.del({ query: { hash: from.hashcode() } });
|
|
1189
|
+
|
|
1190
|
+
diffs = [
|
|
1191
|
+
...deleted.map((x) => {
|
|
1192
|
+
return { range: x, type: "removed" as const, timestamp };
|
|
1193
|
+
}),
|
|
1194
|
+
...ranges.map((x) => {
|
|
1195
|
+
return { range: x, type: "added" as const, timestamp };
|
|
1196
|
+
}),
|
|
1197
|
+
];
|
|
1198
|
+
}
|
|
1174
1199
|
|
|
1175
1200
|
isNewReplicator = prevCount === 0 && ranges.length > 0;
|
|
1176
1201
|
} else {
|
|
@@ -1193,16 +1218,20 @@ export class SharedLog<
|
|
|
1193
1218
|
}
|
|
1194
1219
|
}
|
|
1195
1220
|
|
|
1221
|
+
let prevCountForOwner: number | undefined = undefined;
|
|
1196
1222
|
if (existing.length === 0) {
|
|
1197
|
-
|
|
1223
|
+
prevCountForOwner = await this.replicationIndex.count({
|
|
1198
1224
|
query: new StringMatch({ key: "hash", value: from.hashcode() }),
|
|
1199
1225
|
});
|
|
1200
|
-
isNewReplicator =
|
|
1226
|
+
isNewReplicator = prevCountForOwner === 0;
|
|
1201
1227
|
} else {
|
|
1202
1228
|
isNewReplicator = false;
|
|
1203
1229
|
}
|
|
1204
1230
|
|
|
1205
|
-
if (
|
|
1231
|
+
if (
|
|
1232
|
+
checkDuplicates &&
|
|
1233
|
+
(existing.length > 0 || (prevCountForOwner ?? 0) > 0)
|
|
1234
|
+
) {
|
|
1206
1235
|
let deduplicated: ReplicationRangeIndexable<any>[] = [];
|
|
1207
1236
|
|
|
1208
1237
|
// TODO also deduplicate/de-overlap among the ranges that ought to be inserted?
|
|
@@ -1869,6 +1898,7 @@ export class SharedLog<
|
|
|
1869
1898
|
this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
|
|
1870
1899
|
|
|
1871
1900
|
this.uniqueReplicators = new Set();
|
|
1901
|
+
this._replicatorsReconciled = false;
|
|
1872
1902
|
|
|
1873
1903
|
this.openTime = +new Date();
|
|
1874
1904
|
this.oldestOpenTime = this.openTime;
|
|
@@ -1878,13 +1908,32 @@ export class SharedLog<
|
|
|
1878
1908
|
this.timeUntilRoleMaturity =
|
|
1879
1909
|
options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
|
|
1880
1910
|
this.waitForReplicatorTimeout =
|
|
1881
|
-
options?.waitForReplicatorTimeout
|
|
1882
|
-
this.
|
|
1911
|
+
options?.waitForReplicatorTimeout ?? WAIT_FOR_REPLICATOR_TIMEOUT;
|
|
1912
|
+
this.waitForReplicatorRequestIntervalMs =
|
|
1913
|
+
options?.waitForReplicatorRequestIntervalMs ??
|
|
1914
|
+
WAIT_FOR_REPLICATOR_REQUEST_INTERVAL;
|
|
1915
|
+
this.waitForReplicatorRequestMaxAttempts =
|
|
1916
|
+
options?.waitForReplicatorRequestMaxAttempts;
|
|
1917
|
+
this.waitForPruneDelay = options?.waitForPruneDelay ?? WAIT_FOR_PRUNE_DELAY;
|
|
1883
1918
|
|
|
1884
1919
|
if (this.waitForReplicatorTimeout < this.timeUntilRoleMaturity) {
|
|
1885
1920
|
this.waitForReplicatorTimeout = this.timeUntilRoleMaturity; // does not makes sense to expect a replicator to mature faster than it is reachable
|
|
1886
1921
|
}
|
|
1887
1922
|
|
|
1923
|
+
if (this.waitForReplicatorRequestIntervalMs <= 0) {
|
|
1924
|
+
throw new Error(
|
|
1925
|
+
"waitForReplicatorRequestIntervalMs must be a positive number",
|
|
1926
|
+
);
|
|
1927
|
+
}
|
|
1928
|
+
if (
|
|
1929
|
+
this.waitForReplicatorRequestMaxAttempts != null &&
|
|
1930
|
+
this.waitForReplicatorRequestMaxAttempts <= 0
|
|
1931
|
+
) {
|
|
1932
|
+
throw new Error(
|
|
1933
|
+
"waitForReplicatorRequestMaxAttempts must be a positive number",
|
|
1934
|
+
);
|
|
1935
|
+
}
|
|
1936
|
+
|
|
1888
1937
|
this._closeController = new AbortController();
|
|
1889
1938
|
this._isTrustedReplicator = options?.canReplicate;
|
|
1890
1939
|
this.keep = options?.keep;
|
|
@@ -2043,6 +2092,7 @@ export class SharedLog<
|
|
|
2043
2092
|
rangeIndex: this._replicationRangeIndex,
|
|
2044
2093
|
rpc: this.rpc,
|
|
2045
2094
|
coordinateToHash: this.coordinateToHash,
|
|
2095
|
+
sync: options?.sync,
|
|
2046
2096
|
});
|
|
2047
2097
|
} else {
|
|
2048
2098
|
if (
|
|
@@ -2054,6 +2104,7 @@ export class SharedLog<
|
|
|
2054
2104
|
rpc: this.rpc,
|
|
2055
2105
|
entryIndex: this.entryCoordinatesIndex,
|
|
2056
2106
|
coordinateToHash: this.coordinateToHash,
|
|
2107
|
+
sync: options?.sync,
|
|
2057
2108
|
});
|
|
2058
2109
|
} else {
|
|
2059
2110
|
if (this.domain.resolution === "u32") {
|
|
@@ -2069,6 +2120,7 @@ export class SharedLog<
|
|
|
2069
2120
|
rangeIndex: this._replicationRangeIndex,
|
|
2070
2121
|
rpc: this.rpc,
|
|
2071
2122
|
coordinateToHash: this.coordinateToHash,
|
|
2123
|
+
sync: options?.sync,
|
|
2072
2124
|
}) as Syncronizer<R>;
|
|
2073
2125
|
}
|
|
2074
2126
|
}
|
|
@@ -2168,7 +2220,16 @@ export class SharedLog<
|
|
|
2168
2220
|
await super.afterOpen();
|
|
2169
2221
|
|
|
2170
2222
|
// We do this here, because these calls requires this.closed == false
|
|
2171
|
-
this.pruneOfflineReplicators()
|
|
2223
|
+
void this.pruneOfflineReplicators()
|
|
2224
|
+
.then(() => {
|
|
2225
|
+
this._replicatorsReconciled = true;
|
|
2226
|
+
})
|
|
2227
|
+
.catch((error) => {
|
|
2228
|
+
if (isNotStartedError(error as Error)) {
|
|
2229
|
+
return;
|
|
2230
|
+
}
|
|
2231
|
+
logger.error(error);
|
|
2232
|
+
});
|
|
2172
2233
|
|
|
2173
2234
|
await this.rebalanceParticipation();
|
|
2174
2235
|
|
|
@@ -2384,15 +2445,35 @@ export class SharedLog<
|
|
|
2384
2445
|
set.add(key);
|
|
2385
2446
|
}
|
|
2386
2447
|
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2448
|
+
if (options?.reachableOnly) {
|
|
2449
|
+
// Prefer the live pubsub subscriber set when filtering reachability.
|
|
2450
|
+
// `uniqueReplicators` is primarily driven by replication messages and can lag during
|
|
2451
|
+
// joins/restarts; using subscribers prevents excluding peers that are reachable but
|
|
2452
|
+
// whose replication ranges were loaded from disk or haven't been processed yet.
|
|
2453
|
+
const subscribers =
|
|
2454
|
+
(await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
2455
|
+
undefined;
|
|
2456
|
+
const subscriberHashcodes = subscribers
|
|
2457
|
+
? new Set(subscribers.map((key) => key.hashcode()))
|
|
2458
|
+
: undefined;
|
|
2459
|
+
|
|
2460
|
+
const reachable: string[] = [];
|
|
2461
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2462
|
+
for (const peer of set) {
|
|
2463
|
+
if (peer === selfHash) {
|
|
2464
|
+
reachable.push(peer);
|
|
2465
|
+
continue;
|
|
2466
|
+
}
|
|
2467
|
+
if (
|
|
2468
|
+
subscriberHashcodes
|
|
2469
|
+
? subscriberHashcodes.has(peer)
|
|
2470
|
+
: this.uniqueReplicators.has(peer)
|
|
2471
|
+
) {
|
|
2472
|
+
reachable.push(peer);
|
|
2473
|
+
}
|
|
2392
2474
|
}
|
|
2475
|
+
return reachable;
|
|
2393
2476
|
}
|
|
2394
|
-
return reachableSet;
|
|
2395
|
-
}
|
|
2396
2477
|
|
|
2397
2478
|
return [...set];
|
|
2398
2479
|
} catch (error) {
|
|
@@ -2836,22 +2917,20 @@ export class SharedLog<
|
|
|
2836
2917
|
context.from!.hashcode(),
|
|
2837
2918
|
);
|
|
2838
2919
|
} else if (msg instanceof RequestReplicationInfoMessage) {
|
|
2839
|
-
// TODO this message type is never used, should we remove it?
|
|
2840
|
-
|
|
2841
2920
|
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2842
2921
|
return;
|
|
2843
2922
|
}
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
x.toReplicationRange(),
|
|
2848
|
-
),
|
|
2849
|
-
}),
|
|
2850
|
-
{
|
|
2851
|
-
mode: new SilentDelivery({ to: [context.from], redundancy: 1 }),
|
|
2852
|
-
},
|
|
2923
|
+
|
|
2924
|
+
const segments = (await this.getMyReplicationSegments()).map((x) =>
|
|
2925
|
+
x.toReplicationRange(),
|
|
2853
2926
|
);
|
|
2854
2927
|
|
|
2928
|
+
this.rpc
|
|
2929
|
+
.send(new AllReplicatingSegmentsMessage({ segments }), {
|
|
2930
|
+
mode: new SeekDelivery({ to: [context.from], redundancy: 1 }),
|
|
2931
|
+
})
|
|
2932
|
+
.catch((e) => logger.error(e.toString()));
|
|
2933
|
+
|
|
2855
2934
|
// for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
|
|
2856
2935
|
if (this.v8Behaviour) {
|
|
2857
2936
|
const role = this.getRole();
|
|
@@ -2873,73 +2952,60 @@ export class SharedLog<
|
|
|
2873
2952
|
}
|
|
2874
2953
|
}
|
|
2875
2954
|
} else if (
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
|
|
2882
|
-
|
|
2883
|
-
let replicationInfoMessage = msg as
|
|
2884
|
-
| AllReplicatingSegmentsMessage
|
|
2885
|
-
| AddedReplicationSegmentMessage;
|
|
2886
|
-
|
|
2887
|
-
// we have this statement because peers might have changed/announced their role,
|
|
2888
|
-
// but we don't know them as "subscribers" yet. i.e. they are not online
|
|
2889
|
-
|
|
2890
|
-
this.waitFor(context.from, {
|
|
2891
|
-
signal: this._closeController.signal,
|
|
2892
|
-
timeout: this.waitForReplicatorTimeout,
|
|
2893
|
-
})
|
|
2894
|
-
.then(async () => {
|
|
2895
|
-
// do use an operation log here, because we want to make sure that we don't miss any updates
|
|
2896
|
-
// and do them in the right order
|
|
2897
|
-
const prev = this.latestReplicationInfoMessage.get(
|
|
2898
|
-
context.from!.hashcode(),
|
|
2899
|
-
);
|
|
2955
|
+
msg instanceof AllReplicatingSegmentsMessage ||
|
|
2956
|
+
msg instanceof AddedReplicationSegmentMessage
|
|
2957
|
+
) {
|
|
2958
|
+
if (context.from.equals(this.node.identity.publicKey)) {
|
|
2959
|
+
return;
|
|
2960
|
+
}
|
|
2900
2961
|
|
|
2901
|
-
|
|
2962
|
+
const replicationInfoMessage = msg as
|
|
2963
|
+
| AllReplicatingSegmentsMessage
|
|
2964
|
+
| AddedReplicationSegmentMessage;
|
|
2965
|
+
|
|
2966
|
+
// Process replication updates even if the sender isn't yet considered "ready" by
|
|
2967
|
+
// `Program.waitFor()`. Dropping these messages can lead to missing replicator info
|
|
2968
|
+
// (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
|
|
2969
|
+
const from = context.from!;
|
|
2970
|
+
const messageTimestamp = context.message.header.timestamp;
|
|
2971
|
+
(async () => {
|
|
2972
|
+
const prev = this.latestReplicationInfoMessage.get(from.hashcode());
|
|
2973
|
+
if (prev && prev > messageTimestamp) {
|
|
2902
2974
|
return;
|
|
2903
2975
|
}
|
|
2904
2976
|
|
|
2905
|
-
this.latestReplicationInfoMessage.set(
|
|
2906
|
-
context.from!.hashcode(),
|
|
2907
|
-
context.message.header.timestamp,
|
|
2908
|
-
);
|
|
2909
|
-
|
|
2910
|
-
let reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2977
|
+
this.latestReplicationInfoMessage.set(from.hashcode(), messageTimestamp);
|
|
2911
2978
|
|
|
2912
2979
|
if (this.closed) {
|
|
2913
2980
|
return;
|
|
2914
2981
|
}
|
|
2915
2982
|
|
|
2983
|
+
const reset = msg instanceof AllReplicatingSegmentsMessage;
|
|
2916
2984
|
await this.addReplicationRange(
|
|
2917
2985
|
replicationInfoMessage.segments.map((x) =>
|
|
2918
|
-
x.toReplicationRangeIndexable(
|
|
2986
|
+
x.toReplicationRangeIndexable(from),
|
|
2919
2987
|
),
|
|
2920
|
-
|
|
2988
|
+
from,
|
|
2921
2989
|
{
|
|
2922
2990
|
reset,
|
|
2923
2991
|
checkDuplicates: true,
|
|
2924
|
-
timestamp: Number(
|
|
2992
|
+
timestamp: Number(messageTimestamp),
|
|
2925
2993
|
},
|
|
2926
2994
|
);
|
|
2927
|
-
|
|
2928
|
-
/* await this._modifyReplicators(msg.role, context.from!); */
|
|
2929
|
-
})
|
|
2930
|
-
.catch((e) => {
|
|
2995
|
+
})().catch((e) => {
|
|
2931
2996
|
if (isNotStartedError(e)) {
|
|
2932
2997
|
return;
|
|
2933
2998
|
}
|
|
2934
2999
|
logger.error(
|
|
2935
|
-
|
|
2936
|
-
e?.message
|
|
3000
|
+
`Failed to apply replication settings from '${from.hashcode()}': ${
|
|
3001
|
+
e?.message ?? e
|
|
3002
|
+
}`,
|
|
2937
3003
|
);
|
|
2938
3004
|
});
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
|
|
2942
|
-
|
|
3005
|
+
} else if (msg instanceof StoppedReplicating) {
|
|
3006
|
+
if (context.from.equals(this.node.identity.publicKey)) {
|
|
3007
|
+
return;
|
|
3008
|
+
}
|
|
2943
3009
|
|
|
2944
3010
|
const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(
|
|
2945
3011
|
msg.segmentIds,
|
|
@@ -3132,25 +3198,29 @@ export class SharedLog<
|
|
|
3132
3198
|
},
|
|
3133
3199
|
): Promise<void> {
|
|
3134
3200
|
let entriesToReplicate: Entry<T>[] = [];
|
|
3135
|
-
if (options?.replicate) {
|
|
3201
|
+
if (options?.replicate && this.log.length > 0) {
|
|
3136
3202
|
// TODO this block should perhaps be called from a callback on the this.log.join method on all the ignored element because already joined, like "onAlreadyJoined"
|
|
3137
3203
|
|
|
3138
3204
|
// check which entrise we already have but not are replicating, and replicate them
|
|
3139
3205
|
// we can not just do the 'join' call because it will ignore the already joined entries
|
|
3140
3206
|
for (const element of entries) {
|
|
3141
3207
|
if (typeof element === "string") {
|
|
3142
|
-
|
|
3143
|
-
|
|
3144
|
-
|
|
3208
|
+
if (await this.log.has(element)) {
|
|
3209
|
+
const entry = await this.log.get(element);
|
|
3210
|
+
if (entry) {
|
|
3211
|
+
entriesToReplicate.push(entry);
|
|
3212
|
+
}
|
|
3145
3213
|
}
|
|
3146
3214
|
} else if (element instanceof Entry) {
|
|
3147
3215
|
if (await this.log.has(element.hash)) {
|
|
3148
3216
|
entriesToReplicate.push(element);
|
|
3149
3217
|
}
|
|
3150
3218
|
} else {
|
|
3151
|
-
|
|
3152
|
-
|
|
3153
|
-
|
|
3219
|
+
if (await this.log.has(element.hash)) {
|
|
3220
|
+
const entry = await this.log.get(element.hash);
|
|
3221
|
+
if (entry) {
|
|
3222
|
+
entriesToReplicate.push(entry);
|
|
3223
|
+
}
|
|
3154
3224
|
}
|
|
3155
3225
|
}
|
|
3156
3226
|
}
|
|
@@ -3269,6 +3339,7 @@ export class SharedLog<
|
|
|
3269
3339
|
|
|
3270
3340
|
let settled = false;
|
|
3271
3341
|
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
3342
|
+
let requestTimer: ReturnType<typeof setTimeout> | undefined;
|
|
3272
3343
|
|
|
3273
3344
|
const clear = () => {
|
|
3274
3345
|
this.events.removeEventListener("replicator:mature", check);
|
|
@@ -3278,6 +3349,10 @@ export class SharedLog<
|
|
|
3278
3349
|
clearTimeout(timer);
|
|
3279
3350
|
timer = undefined;
|
|
3280
3351
|
}
|
|
3352
|
+
if (requestTimer != null) {
|
|
3353
|
+
clearTimeout(requestTimer);
|
|
3354
|
+
requestTimer = undefined;
|
|
3355
|
+
}
|
|
3281
3356
|
};
|
|
3282
3357
|
|
|
3283
3358
|
const resolve = () => {
|
|
@@ -3309,6 +3384,42 @@ export class SharedLog<
|
|
|
3309
3384
|
);
|
|
3310
3385
|
}, timeoutMs);
|
|
3311
3386
|
|
|
3387
|
+
let requestAttempts = 0;
|
|
3388
|
+
const requestIntervalMs = this.waitForReplicatorRequestIntervalMs;
|
|
3389
|
+
const maxRequestAttempts =
|
|
3390
|
+
this.waitForReplicatorRequestMaxAttempts ??
|
|
3391
|
+
Math.max(
|
|
3392
|
+
WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS,
|
|
3393
|
+
Math.ceil(timeoutMs / requestIntervalMs),
|
|
3394
|
+
);
|
|
3395
|
+
|
|
3396
|
+
const requestReplicationInfo = () => {
|
|
3397
|
+
if (settled || this.closed) {
|
|
3398
|
+
return;
|
|
3399
|
+
}
|
|
3400
|
+
|
|
3401
|
+
if (requestAttempts >= maxRequestAttempts) {
|
|
3402
|
+
return;
|
|
3403
|
+
}
|
|
3404
|
+
|
|
3405
|
+
requestAttempts++;
|
|
3406
|
+
|
|
3407
|
+
this.rpc
|
|
3408
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3409
|
+
mode: new SeekDelivery({ redundancy: 1, to: [key] }),
|
|
3410
|
+
})
|
|
3411
|
+
.catch((e) => {
|
|
3412
|
+
// Best-effort: missing peers / unopened RPC should not fail the wait logic.
|
|
3413
|
+
if (isNotStartedError(e as Error)) {
|
|
3414
|
+
return;
|
|
3415
|
+
}
|
|
3416
|
+
});
|
|
3417
|
+
|
|
3418
|
+
if (requestAttempts < maxRequestAttempts) {
|
|
3419
|
+
requestTimer = setTimeout(requestReplicationInfo, requestIntervalMs);
|
|
3420
|
+
}
|
|
3421
|
+
};
|
|
3422
|
+
|
|
3312
3423
|
const check = async () => {
|
|
3313
3424
|
const iterator = this.replicationIndex?.iterate(
|
|
3314
3425
|
{ query: new StringMatch({ key: "hash", value: key.hashcode() }) },
|
|
@@ -3333,6 +3444,7 @@ export class SharedLog<
|
|
|
3333
3444
|
}
|
|
3334
3445
|
};
|
|
3335
3446
|
|
|
3447
|
+
requestReplicationInfo();
|
|
3336
3448
|
check();
|
|
3337
3449
|
this.events.addEventListener("replicator:mature", check);
|
|
3338
3450
|
this.events.addEventListener("replication:change", check);
|
|
@@ -3582,27 +3694,54 @@ export class SharedLog<
|
|
|
3582
3694
|
return 0;
|
|
3583
3695
|
}
|
|
3584
3696
|
|
|
3585
|
-
|
|
3586
|
-
|
|
3587
|
-
|
|
3588
|
-
|
|
3589
|
-
?.length ?? 1);
|
|
3590
|
-
const diffToOldest =
|
|
3591
|
-
subscribers > 1 ? now - this.oldestOpenTime - 1 : Number.MAX_SAFE_INTEGER;
|
|
3592
|
-
|
|
3593
|
-
const result = Math.min(
|
|
3594
|
-
this.timeUntilRoleMaturity,
|
|
3595
|
-
Math.max(diffToOldest, this.timeUntilRoleMaturity),
|
|
3596
|
-
Math.max(
|
|
3597
|
-
Math.round(
|
|
3598
|
-
(this.timeUntilRoleMaturity * Math.log(subscribers + 1)) / 3,
|
|
3599
|
-
),
|
|
3600
|
-
this.timeUntilRoleMaturity,
|
|
3601
|
-
),
|
|
3602
|
-
); // / 3 so that if 2 replicators and timeUntilRoleMaturity = 1e4 the result will be 1
|
|
3697
|
+
// Explicitly disable maturity gating (used by many tests).
|
|
3698
|
+
if (this.timeUntilRoleMaturity <= 0) {
|
|
3699
|
+
return 0;
|
|
3700
|
+
}
|
|
3603
3701
|
|
|
3604
|
-
|
|
3605
|
-
|
|
3702
|
+
// If we're alone (or pubsub isn't ready), a fixed maturity time is sufficient.
|
|
3703
|
+
// When there are multiple replicators we want a stable threshold that doesn't
|
|
3704
|
+
// depend on "now" (otherwise it can drift and turn into a flake).
|
|
3705
|
+
let subscribers = 1;
|
|
3706
|
+
if (!this.rpc.closed) {
|
|
3707
|
+
try {
|
|
3708
|
+
subscribers =
|
|
3709
|
+
(await this.node.services.pubsub.getSubscribers(this.rpc.topic))
|
|
3710
|
+
?.length ?? 1;
|
|
3711
|
+
} catch {
|
|
3712
|
+
// Best-effort only; fall back to 1.
|
|
3713
|
+
}
|
|
3714
|
+
}
|
|
3715
|
+
|
|
3716
|
+
if (subscribers <= 1) {
|
|
3717
|
+
return this.timeUntilRoleMaturity;
|
|
3718
|
+
}
|
|
3719
|
+
|
|
3720
|
+
// Use replication range timestamps to compute a stable "age gap" between the
|
|
3721
|
+
// newest and oldest known roles. This keeps the oldest role mature while
|
|
3722
|
+
// preventing newer roles from being treated as mature purely because time
|
|
3723
|
+
// passes between test steps / network events.
|
|
3724
|
+
let newestOpenTime = this.openTime;
|
|
3725
|
+
try {
|
|
3726
|
+
const newestIterator = await this.replicationIndex.iterate(
|
|
3727
|
+
{
|
|
3728
|
+
sort: [new Sort({ key: "timestamp", direction: "desc" })],
|
|
3729
|
+
},
|
|
3730
|
+
{ shape: { timestamp: true }, reference: true },
|
|
3731
|
+
);
|
|
3732
|
+
const newestTimestampFromDB = (await newestIterator.next(1))[0]?.value
|
|
3733
|
+
.timestamp;
|
|
3734
|
+
await newestIterator.close();
|
|
3735
|
+
if (newestTimestampFromDB != null) {
|
|
3736
|
+
newestOpenTime = Number(newestTimestampFromDB);
|
|
3737
|
+
}
|
|
3738
|
+
} catch {
|
|
3739
|
+
// Best-effort only; fall back to local open time.
|
|
3740
|
+
}
|
|
3741
|
+
|
|
3742
|
+
const ageGapToOldest = newestOpenTime - this.oldestOpenTime;
|
|
3743
|
+
const roleAge = Math.max(this.timeUntilRoleMaturity, ageGapToOldest);
|
|
3744
|
+
return roleAge < 0 ? 0 : roleAge;
|
|
3606
3745
|
}
|
|
3607
3746
|
|
|
3608
3747
|
async findLeaders(
|
|
@@ -3681,13 +3820,37 @@ export class SharedLog<
|
|
|
3681
3820
|
},
|
|
3682
3821
|
): Promise<Map<string, { intersecting: boolean }>> {
|
|
3683
3822
|
const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
|
|
3823
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3824
|
+
|
|
3825
|
+
// Use `uniqueReplicators` (replicator cache) once we've reconciled it against the
|
|
3826
|
+
// persisted replication index. Until then, fall back to live pubsub subscribers
|
|
3827
|
+
// and avoid relying on `uniqueReplicators` being complete.
|
|
3828
|
+
let peerFilter: Set<string> | undefined = undefined;
|
|
3829
|
+
if (this._replicatorsReconciled && this.uniqueReplicators.size > 0) {
|
|
3830
|
+
peerFilter = this.uniqueReplicators.has(selfHash)
|
|
3831
|
+
? this.uniqueReplicators
|
|
3832
|
+
: new Set([...this.uniqueReplicators, selfHash]);
|
|
3833
|
+
} else {
|
|
3834
|
+
try {
|
|
3835
|
+
const subscribers =
|
|
3836
|
+
(await this.node.services.pubsub.getSubscribers(this.topic)) ??
|
|
3837
|
+
undefined;
|
|
3838
|
+
if (subscribers && subscribers.length > 0) {
|
|
3839
|
+
peerFilter = new Set(subscribers.map((key) => key.hashcode()));
|
|
3840
|
+
peerFilter.add(selfHash);
|
|
3841
|
+
}
|
|
3842
|
+
} catch {
|
|
3843
|
+
// Best-effort only; if pubsub isn't ready, do a full scan.
|
|
3844
|
+
}
|
|
3845
|
+
}
|
|
3684
3846
|
return getSamples<R>(
|
|
3685
3847
|
cursors,
|
|
3686
3848
|
this.replicationIndex,
|
|
3687
3849
|
roleAge,
|
|
3688
3850
|
this.indexableDomain.numbers,
|
|
3689
3851
|
{
|
|
3690
|
-
|
|
3852
|
+
peerFilter,
|
|
3853
|
+
uniqueReplicators: peerFilter,
|
|
3691
3854
|
},
|
|
3692
3855
|
);
|
|
3693
3856
|
}
|
|
@@ -3742,7 +3905,7 @@ export class SharedLog<
|
|
|
3742
3905
|
for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
|
|
3743
3906
|
v.delete(publicKey.hashcode());
|
|
3744
3907
|
if (v.size === 0) {
|
|
3745
|
-
this.
|
|
3908
|
+
this._requestIPruneResponseReplicatorSet.delete(k);
|
|
3746
3909
|
}
|
|
3747
3910
|
}
|
|
3748
3911
|
|
|
@@ -3781,6 +3944,15 @@ export class SharedLog<
|
|
|
3781
3944
|
.catch((e) => logger.error(e.toString()));
|
|
3782
3945
|
}
|
|
3783
3946
|
}
|
|
3947
|
+
|
|
3948
|
+
// Request the remote peer's replication info. This makes joins resilient to
|
|
3949
|
+
// timing-sensitive delivery/order issues where we may miss their initial
|
|
3950
|
+
// replication announcement.
|
|
3951
|
+
this.rpc
|
|
3952
|
+
.send(new RequestReplicationInfoMessage(), {
|
|
3953
|
+
mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
|
|
3954
|
+
})
|
|
3955
|
+
.catch((e) => logger.error(e.toString()));
|
|
3784
3956
|
} else {
|
|
3785
3957
|
await this.removeReplicator(publicKey);
|
|
3786
3958
|
}
|
|
@@ -3940,11 +4112,28 @@ export class SharedLog<
|
|
|
3940
4112
|
|
|
3941
4113
|
let cursor: NumberFromType<R>[] | undefined = undefined;
|
|
3942
4114
|
|
|
3943
|
-
|
|
4115
|
+
// Checked prune requests can legitimately take longer than a fixed 10s:
|
|
4116
|
+
// - The remote may not have the entry yet and will wait up to `_respondToIHaveTimeout`
|
|
4117
|
+
// - Leadership/replicator information may take up to `waitForReplicatorTimeout` to settle
|
|
4118
|
+
// If we time out too early we can end up with permanently prunable heads that never
|
|
4119
|
+
// get retried (a common CI flake in "prune before join" tests).
|
|
4120
|
+
const checkedPruneTimeoutMs =
|
|
4121
|
+
options?.timeout ??
|
|
4122
|
+
Math.max(
|
|
4123
|
+
10_000,
|
|
4124
|
+
Number(this._respondToIHaveTimeout ?? 0) +
|
|
4125
|
+
this.waitForReplicatorTimeout +
|
|
4126
|
+
PRUNE_DEBOUNCE_INTERVAL * 2,
|
|
4127
|
+
);
|
|
4128
|
+
|
|
4129
|
+
const timeout = setTimeout(() => {
|
|
3944
4130
|
reject(
|
|
3945
|
-
new Error(
|
|
4131
|
+
new Error(
|
|
4132
|
+
`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`,
|
|
4133
|
+
),
|
|
3946
4134
|
);
|
|
3947
|
-
},
|
|
4135
|
+
}, checkedPruneTimeoutMs);
|
|
4136
|
+
timeout.unref?.();
|
|
3948
4137
|
|
|
3949
4138
|
this._pendingDeletes.set(entry.hash, {
|
|
3950
4139
|
promise: deferredPromise,
|
|
@@ -4092,8 +4281,13 @@ export class SharedLog<
|
|
|
4092
4281
|
);
|
|
4093
4282
|
}
|
|
4094
4283
|
|
|
4095
|
-
async waitForPruned(
|
|
4096
|
-
|
|
4284
|
+
async waitForPruned(options?: {
|
|
4285
|
+
timeout?: number;
|
|
4286
|
+
signal?: AbortSignal;
|
|
4287
|
+
delayInterval?: number;
|
|
4288
|
+
timeoutMessage?: string;
|
|
4289
|
+
}) {
|
|
4290
|
+
await waitFor(() => this._pendingDeletes.size === 0, options);
|
|
4097
4291
|
}
|
|
4098
4292
|
|
|
4099
4293
|
async onReplicationChange(
|
|
@@ -4272,11 +4466,24 @@ export class SharedLog<
|
|
|
4272
4466
|
cpuUsage: this.cpuUsage?.value(),
|
|
4273
4467
|
});
|
|
4274
4468
|
|
|
4469
|
+
const absoluteDifference = Math.abs(dynamicRange.widthNormalized - newFactor);
|
|
4275
4470
|
const relativeDifference =
|
|
4276
|
-
|
|
4277
|
-
|
|
4471
|
+
absoluteDifference /
|
|
4472
|
+
Math.max(
|
|
4473
|
+
dynamicRange.widthNormalized,
|
|
4474
|
+
RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR,
|
|
4475
|
+
);
|
|
4476
|
+
|
|
4477
|
+
let minRelativeChange = RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE;
|
|
4478
|
+
if (this.replicationController.maxMemoryLimit != null) {
|
|
4479
|
+
minRelativeChange =
|
|
4480
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT;
|
|
4481
|
+
} else if (this.replicationController.maxCPUUsage != null) {
|
|
4482
|
+
minRelativeChange =
|
|
4483
|
+
RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT;
|
|
4484
|
+
}
|
|
4278
4485
|
|
|
4279
|
-
if (relativeDifference >
|
|
4486
|
+
if (relativeDifference > minRelativeChange) {
|
|
4280
4487
|
// TODO can not reuse old range, since it will (potentially) affect the index because of sideeffects
|
|
4281
4488
|
dynamicRange = new this.indexableDomain.constructorRange({
|
|
4282
4489
|
offset: dynamicRange.start1,
|