@peerbit/shared-log 13.1.0 → 13.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/join-backfill-repair.d.ts +2 -0
- package/dist/benchmark/join-backfill-repair.d.ts.map +1 -0
- package/dist/benchmark/join-backfill-repair.js +288 -0
- package/dist/benchmark/join-backfill-repair.js.map +1 -0
- package/dist/src/index.d.ts +38 -2
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +989 -119
- package/dist/src/index.js.map +1 -1
- package/dist/src/pid.d.ts.map +1 -1
- package/dist/src/pid.js +16 -2
- package/dist/src/pid.js.map +1 -1
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +8 -1
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/simple.d.ts +7 -0
- package/dist/src/sync/simple.d.ts.map +1 -1
- package/dist/src/sync/simple.js +71 -22
- package/dist/src/sync/simple.js.map +1 -1
- package/package.json +8 -7
- package/src/index.ts +1347 -230
- package/src/pid.ts +22 -2
- package/src/ranges.ts +9 -1
- package/src/sync/simple.ts +56 -23
package/dist/src/index.js
CHANGED
|
@@ -66,7 +66,7 @@ import {} from "./replication-domain.js";
|
|
|
66
66
|
import { AbsoluteReplicas, AddedReplicationSegmentMessage, AllReplicatingSegmentsMessage, MinReplicas, ReplicationPingMessage, ReplicationError, RequestReplicationInfoMessage, ResponseRoleMessage, StoppedReplicating, decodeReplicas, encodeReplicas, maxReplicas, } from "./replication.js";
|
|
67
67
|
import { Observer, Replicator } from "./role.js";
|
|
68
68
|
import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
|
|
69
|
-
import { SimpleSyncronizer } from "./sync/simple.js";
|
|
69
|
+
import { ConfirmEntriesMessage, SimpleSyncronizer } from "./sync/simple.js";
|
|
70
70
|
import { groupByGid } from "./utils.js";
|
|
71
71
|
const toLocalPublicSignKey = (key) => {
|
|
72
72
|
if (typeof key === "string") {
|
|
@@ -263,10 +263,95 @@ const REPLICATOR_LIVENESS_PROBE_FAILURES_TO_EVICT = 2;
|
|
|
263
263
|
// Churn/join repair can race with pruning and transient missed sync requests under
|
|
264
264
|
// heavy event-loop load. Keep retries alive with a longer tail so reassigned
|
|
265
265
|
// entries are retried after short bursts and slower recovery windows.
|
|
266
|
-
const
|
|
266
|
+
const CHURN_REPAIR_RETRY_SCHEDULE_MS = [
|
|
267
267
|
0, 1_000, 3_000, 7_000, 15_000, 30_000, 45_000,
|
|
268
268
|
];
|
|
269
|
-
const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
|
|
269
|
+
const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
|
|
270
|
+
0,
|
|
271
|
+
1_000,
|
|
272
|
+
3_000,
|
|
273
|
+
7_000,
|
|
274
|
+
15_000,
|
|
275
|
+
30_000,
|
|
276
|
+
60_000,
|
|
277
|
+
];
|
|
278
|
+
const JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS = [
|
|
279
|
+
0,
|
|
280
|
+
1_000,
|
|
281
|
+
3_000,
|
|
282
|
+
7_000,
|
|
283
|
+
15_000,
|
|
284
|
+
30_000,
|
|
285
|
+
60_000,
|
|
286
|
+
];
|
|
287
|
+
const APPEND_BACKFILL_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
|
|
288
|
+
const JOIN_AUTHORITATIVE_REPAIR_DELAY_MS = 2_000;
|
|
289
|
+
const JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS = [
|
|
290
|
+
JOIN_AUTHORITATIVE_REPAIR_DELAY_MS,
|
|
291
|
+
7_000,
|
|
292
|
+
15_000,
|
|
293
|
+
30_000,
|
|
294
|
+
];
|
|
295
|
+
const APPEND_BACKFILL_DELAY_MS = 500;
|
|
296
|
+
const ASSUME_SYNCED_REPAIR_SUPPRESSION_MS = 5_000;
|
|
297
|
+
const REPAIR_CONFIRMATION_HASH_BATCH_SIZE = 1_024;
|
|
298
|
+
const REPAIR_DISPATCH_MODES = [
|
|
299
|
+
"join-warmup",
|
|
300
|
+
"join-authoritative",
|
|
301
|
+
"append-backfill",
|
|
302
|
+
"churn",
|
|
303
|
+
];
|
|
304
|
+
const createRepairMetricBucket = () => ({
|
|
305
|
+
dispatches: 0,
|
|
306
|
+
entries: 0,
|
|
307
|
+
ratelessFirstPasses: 0,
|
|
308
|
+
simpleFallbackPasses: 0,
|
|
309
|
+
});
|
|
310
|
+
const createRepairMetrics = () => ({
|
|
311
|
+
"join-warmup": createRepairMetricBucket(),
|
|
312
|
+
"join-authoritative": createRepairMetricBucket(),
|
|
313
|
+
"append-backfill": createRepairMetricBucket(),
|
|
314
|
+
churn: createRepairMetricBucket(),
|
|
315
|
+
});
|
|
316
|
+
const createRepairPendingPeersByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]));
|
|
317
|
+
const cloneRepairPendingPeersByMode = (pending) => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set(pending.get(mode) ?? [])]));
|
|
318
|
+
const createRepairFrontierByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
|
|
319
|
+
const createRepairActiveTargetsByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]));
|
|
320
|
+
const getRepairRetrySchedule = (mode) => {
|
|
321
|
+
switch (mode) {
|
|
322
|
+
case "join-warmup":
|
|
323
|
+
return JOIN_WARMUP_RETRY_SCHEDULE_MS;
|
|
324
|
+
case "join-authoritative":
|
|
325
|
+
return JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS;
|
|
326
|
+
case "append-backfill":
|
|
327
|
+
return APPEND_BACKFILL_RETRY_SCHEDULE_MS;
|
|
328
|
+
case "churn":
|
|
329
|
+
return CHURN_REPAIR_RETRY_SCHEDULE_MS;
|
|
330
|
+
}
|
|
331
|
+
};
|
|
332
|
+
const resolveRepairRetrySchedule = (mode, override, trackedFrontier = false) => {
|
|
333
|
+
const fallback = getRepairRetrySchedule(mode);
|
|
334
|
+
if (!override || override.length === 0) {
|
|
335
|
+
return fallback;
|
|
336
|
+
}
|
|
337
|
+
if (trackedFrontier &&
|
|
338
|
+
override.length === 1 &&
|
|
339
|
+
override[0] === 0 &&
|
|
340
|
+
fallback.length > 1) {
|
|
341
|
+
// A tracked frontier with only an immediate retry would otherwise stay on
|
|
342
|
+
// attempt 0 forever, which means rateless-only retries and no sparse-tail
|
|
343
|
+
// simple fallback. Keep the immediate seed, then continue with the normal
|
|
344
|
+
// tracked repair schedule.
|
|
345
|
+
return [0, ...fallback.slice(1)];
|
|
346
|
+
}
|
|
347
|
+
return override;
|
|
348
|
+
};
|
|
349
|
+
const getRepairTransportForAttempt = (mode, attemptIndex) => {
|
|
350
|
+
if (mode === "churn") {
|
|
351
|
+
return "simple";
|
|
352
|
+
}
|
|
353
|
+
return attemptIndex === 0 ? "rateless" : "simple";
|
|
354
|
+
};
|
|
270
355
|
const toPositiveInteger = (value, fallback, label) => {
|
|
271
356
|
if (value == null) {
|
|
272
357
|
return fallback;
|
|
@@ -375,8 +460,18 @@ let SharedLog = (() => {
|
|
|
375
460
|
_repairRetryTimers;
|
|
376
461
|
_recentRepairDispatch;
|
|
377
462
|
_repairSweepRunning;
|
|
378
|
-
|
|
379
|
-
|
|
463
|
+
_repairSweepPendingModes;
|
|
464
|
+
_repairSweepPendingPeersByMode;
|
|
465
|
+
_repairFrontierByMode;
|
|
466
|
+
_repairFrontierActiveTargetsByMode;
|
|
467
|
+
_repairSweepOptimisticGidPeersPending;
|
|
468
|
+
_entryKnownPeers;
|
|
469
|
+
_joinAuthoritativeRepairTimersByDelay;
|
|
470
|
+
_joinAuthoritativeRepairPeersByDelay;
|
|
471
|
+
_assumeSyncedRepairSuppressedUntil;
|
|
472
|
+
_appendBackfillTimer;
|
|
473
|
+
_appendBackfillPendingByTarget;
|
|
474
|
+
_repairMetrics;
|
|
380
475
|
_topicSubscribersCache;
|
|
381
476
|
// regular distribution checks
|
|
382
477
|
distributeQueue;
|
|
@@ -716,7 +811,7 @@ let SharedLog = (() => {
|
|
|
716
811
|
}),
|
|
717
812
|
});
|
|
718
813
|
}
|
|
719
|
-
async _appendDeliverToReplicators(entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
|
|
814
|
+
async _appendDeliverToReplicators(entry, coordinates, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
|
|
720
815
|
const { delivery, reliability, requireRecipients, minAcks, wrap } = this._parseDeliveryOptions(deliveryArg);
|
|
721
816
|
const pending = [];
|
|
722
817
|
const track = (promise) => {
|
|
@@ -725,10 +820,32 @@ let SharedLog = (() => {
|
|
|
725
820
|
const fanoutUnicastOptions = delivery?.timeout != null || delivery?.signal != null
|
|
726
821
|
? { timeoutMs: delivery.timeout, signal: delivery.signal }
|
|
727
822
|
: undefined;
|
|
823
|
+
const fullReplicaDeliveryCandidates = await this.getFullReplicaRepairCandidates(undefined, {
|
|
824
|
+
includeSubscribers: false,
|
|
825
|
+
});
|
|
826
|
+
if (minReplicasValue >= Math.max(1, fullReplicaDeliveryCandidates.size)) {
|
|
827
|
+
for (const peer of fullReplicaDeliveryCandidates) {
|
|
828
|
+
if (!leaders.has(peer)) {
|
|
829
|
+
leaders.set(peer, { intersecting: true });
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
const entryReplicatedForRepair = this.createEntryReplicatedForRepair({
|
|
834
|
+
entry,
|
|
835
|
+
coordinates,
|
|
836
|
+
leaders: leaders,
|
|
837
|
+
replicas: minReplicasValue,
|
|
838
|
+
});
|
|
728
839
|
for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
|
|
729
840
|
await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
|
|
730
|
-
const
|
|
731
|
-
const
|
|
841
|
+
const authoritativeRecipients = new Set(leaders.keys());
|
|
842
|
+
const leadersForDelivery = delivery
|
|
843
|
+
? new Set(authoritativeRecipients)
|
|
844
|
+
: undefined;
|
|
845
|
+
// Outbound append delivery only tells us who we intend to send to, not who has
|
|
846
|
+
// actually stored the entry. Keep this recipient set local so later repair
|
|
847
|
+
// sweeps can still backfill peers that missed the initial delivery.
|
|
848
|
+
const set = new Set(leaders.keys());
|
|
732
849
|
let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
733
850
|
const allowSubscriberFallback = this.syncronizer instanceof SimpleSyncronizer ||
|
|
734
851
|
(this.compatibility ?? Number.MAX_VALUE) < 10;
|
|
@@ -758,6 +875,17 @@ let SharedLog = (() => {
|
|
|
758
875
|
continue;
|
|
759
876
|
}
|
|
760
877
|
if (!delivery) {
|
|
878
|
+
for (const peer of authoritativeRecipients) {
|
|
879
|
+
if (peer === selfHash) {
|
|
880
|
+
continue;
|
|
881
|
+
}
|
|
882
|
+
// Default live append delivery is still optimistic. If one remote misses
|
|
883
|
+
// the initial heads exchange and the caller did not opt into explicit
|
|
884
|
+
// delivery acks, we still need a targeted backfill source of truth for the
|
|
885
|
+
// authoritative recipients or one entry can get stuck at 2/3 replicas
|
|
886
|
+
// forever. Best-effort fallback subscribers are not repair-worthy.
|
|
887
|
+
this.queueAppendBackfill(peer, entryReplicatedForRepair);
|
|
888
|
+
}
|
|
761
889
|
this.rpc
|
|
762
890
|
.send(message, {
|
|
763
891
|
mode: isLeader
|
|
@@ -785,12 +913,16 @@ let SharedLog = (() => {
|
|
|
785
913
|
}
|
|
786
914
|
const ackTo = [];
|
|
787
915
|
let silentTo;
|
|
916
|
+
const repairTargets = new Set();
|
|
788
917
|
// Default delivery semantics: require enough remote ACKs to reach the requested
|
|
789
918
|
// replication degree (local append counts as 1).
|
|
790
919
|
const defaultMinAcks = Math.max(0, minReplicasValue - 1);
|
|
791
920
|
const ackLimitRaw = reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
|
|
792
921
|
const ackLimit = Math.max(0, Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length));
|
|
793
922
|
for (const peer of orderedRemoteRecipients) {
|
|
923
|
+
if (authoritativeRecipients.has(peer)) {
|
|
924
|
+
repairTargets.add(peer);
|
|
925
|
+
}
|
|
794
926
|
if (ackTo.length < ackLimit) {
|
|
795
927
|
ackTo.push(peer);
|
|
796
928
|
}
|
|
@@ -825,6 +957,12 @@ let SharedLog = (() => {
|
|
|
825
957
|
})
|
|
826
958
|
.catch((error) => logger.error(error));
|
|
827
959
|
}
|
|
960
|
+
for (const peer of repairTargets) {
|
|
961
|
+
// Direct append delivery is intentionally optimistic. Queue one delayed,
|
|
962
|
+
// batched maybe-sync pass for the intended recipients so stable 3-peer
|
|
963
|
+
// append workloads do not depend on perfect first-try delivery ordering.
|
|
964
|
+
this.queueAppendBackfill(peer, entryReplicatedForRepair);
|
|
965
|
+
}
|
|
828
966
|
}
|
|
829
967
|
if (pending.length > 0) {
|
|
830
968
|
await Promise.all(pending);
|
|
@@ -1376,6 +1514,7 @@ let SharedLog = (() => {
|
|
|
1376
1514
|
// Keep local sync/prune state consistent even when a peer disappears
|
|
1377
1515
|
// through replication-info updates without a topic unsubscribe event.
|
|
1378
1516
|
this.removePeerFromGidPeerHistory(keyHash);
|
|
1517
|
+
this.removeRepairFrontierTarget(keyHash);
|
|
1379
1518
|
this._recentRepairDispatch.delete(keyHash);
|
|
1380
1519
|
if (!isMe) {
|
|
1381
1520
|
this.syncronizer.onPeerDisconnected(keyHash);
|
|
@@ -1726,6 +1865,7 @@ let SharedLog = (() => {
|
|
|
1726
1865
|
for (const key of this._gidPeersHistory.keys()) {
|
|
1727
1866
|
this.removePeerFromGidPeerHistory(publicKeyHash, key);
|
|
1728
1867
|
}
|
|
1868
|
+
this.removePeerFromEntryKnownPeers(publicKeyHash);
|
|
1729
1869
|
}
|
|
1730
1870
|
}
|
|
1731
1871
|
addPeersToGidPeerHistory(gid, publicKeys, reset) {
|
|
@@ -1744,10 +1884,343 @@ let SharedLog = (() => {
|
|
|
1744
1884
|
}
|
|
1745
1885
|
return set;
|
|
1746
1886
|
}
|
|
1887
|
+
markEntriesKnownByPeer(hashes, peer) {
|
|
1888
|
+
for (const hash of hashes) {
|
|
1889
|
+
let peers = this._entryKnownPeers.get(hash);
|
|
1890
|
+
if (!peers) {
|
|
1891
|
+
peers = new Set();
|
|
1892
|
+
this._entryKnownPeers.set(hash, peers);
|
|
1893
|
+
}
|
|
1894
|
+
peers.add(peer);
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
1897
|
+
removeEntriesKnownByPeer(hashes, peer) {
|
|
1898
|
+
for (const hash of hashes) {
|
|
1899
|
+
const peers = this._entryKnownPeers.get(hash);
|
|
1900
|
+
if (!peers) {
|
|
1901
|
+
continue;
|
|
1902
|
+
}
|
|
1903
|
+
peers.delete(peer);
|
|
1904
|
+
if (peers.size === 0) {
|
|
1905
|
+
this._entryKnownPeers.delete(hash);
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
}
|
|
1909
|
+
removePeerFromEntryKnownPeers(peer) {
|
|
1910
|
+
for (const [hash, peers] of this._entryKnownPeers) {
|
|
1911
|
+
peers.delete(peer);
|
|
1912
|
+
if (peers.size === 0) {
|
|
1913
|
+
this._entryKnownPeers.delete(hash);
|
|
1914
|
+
}
|
|
1915
|
+
}
|
|
1916
|
+
}
|
|
1917
|
+
isEntryKnownByPeer(hash, peer) {
|
|
1918
|
+
return this._entryKnownPeers.get(hash)?.has(peer) === true;
|
|
1919
|
+
}
|
|
1920
|
+
markRepairSweepOptimisticPeer(gid, peer) {
|
|
1921
|
+
let peers = this._repairSweepOptimisticGidPeersPending.get(gid);
|
|
1922
|
+
if (!peers) {
|
|
1923
|
+
peers = new Map();
|
|
1924
|
+
this._repairSweepOptimisticGidPeersPending.set(gid, peers);
|
|
1925
|
+
}
|
|
1926
|
+
peers.set(peer, (peers.get(peer) || 0) + 1);
|
|
1927
|
+
}
|
|
1928
|
+
hasPendingRepairSweepOptimisticPeer(gid, peer) {
|
|
1929
|
+
return (this._repairSweepOptimisticGidPeersPending.get(gid)?.get(peer) || 0) > 0;
|
|
1930
|
+
}
|
|
1931
|
+
createEntryReplicatedForRepair(properties) {
|
|
1932
|
+
const assignedToRangeBoundary = shouldAssignToRangeBoundary(properties.leaders, properties.replicas);
|
|
1933
|
+
const cidObject = cidifyString(properties.entry.hash);
|
|
1934
|
+
const hashNumber = this.indexableDomain.numbers.bytesToNumber(cidObject.multihash.digest);
|
|
1935
|
+
return new this.indexableDomain.constructorEntry({
|
|
1936
|
+
assignedToRangeBoundary,
|
|
1937
|
+
coordinates: properties.coordinates,
|
|
1938
|
+
meta: properties.entry.meta,
|
|
1939
|
+
hash: properties.entry.hash,
|
|
1940
|
+
hashNumber,
|
|
1941
|
+
});
|
|
1942
|
+
}
|
|
1943
|
+
isAssumeSyncedRepairSuppressed() {
|
|
1944
|
+
return this._assumeSyncedRepairSuppressedUntil > Date.now();
|
|
1945
|
+
}
|
|
1946
|
+
isFrontierTrackedRepairMode(mode) {
|
|
1947
|
+
return mode !== "join-warmup";
|
|
1948
|
+
}
|
|
1949
|
+
async sleepTracked(delayMs) {
|
|
1950
|
+
if (delayMs <= 0) {
|
|
1951
|
+
return;
|
|
1952
|
+
}
|
|
1953
|
+
await new Promise((resolve) => {
|
|
1954
|
+
const timer = setTimeout(() => {
|
|
1955
|
+
this._repairRetryTimers.delete(timer);
|
|
1956
|
+
resolve();
|
|
1957
|
+
}, delayMs);
|
|
1958
|
+
timer.unref?.();
|
|
1959
|
+
this._repairRetryTimers.add(timer);
|
|
1960
|
+
});
|
|
1961
|
+
}
|
|
1962
|
+
queueRepairFrontierEntries(mode, target, entries) {
|
|
1963
|
+
let targets = this._repairFrontierByMode.get(mode);
|
|
1964
|
+
if (!targets) {
|
|
1965
|
+
targets = new Map();
|
|
1966
|
+
this._repairFrontierByMode.set(mode, targets);
|
|
1967
|
+
}
|
|
1968
|
+
let pending = targets.get(target);
|
|
1969
|
+
if (!pending) {
|
|
1970
|
+
pending = new Map();
|
|
1971
|
+
targets.set(target, pending);
|
|
1972
|
+
}
|
|
1973
|
+
for (const [hash, entry] of entries) {
|
|
1974
|
+
pending.set(hash, entry);
|
|
1975
|
+
}
|
|
1976
|
+
}
|
|
1977
|
+
clearRepairFrontierHashes(target, hashes) {
|
|
1978
|
+
const hashList = [...hashes];
|
|
1979
|
+
if (hashList.length === 0) {
|
|
1980
|
+
return;
|
|
1981
|
+
}
|
|
1982
|
+
for (const mode of REPAIR_DISPATCH_MODES) {
|
|
1983
|
+
const pending = this._repairFrontierByMode.get(mode)?.get(target);
|
|
1984
|
+
if (!pending) {
|
|
1985
|
+
continue;
|
|
1986
|
+
}
|
|
1987
|
+
for (const hash of hashList) {
|
|
1988
|
+
pending.delete(hash);
|
|
1989
|
+
}
|
|
1990
|
+
if (pending.size === 0) {
|
|
1991
|
+
this._repairFrontierByMode.get(mode)?.delete(target);
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
}
|
|
1995
|
+
async getFullReplicaRepairCandidates(extraPeers, options) {
|
|
1996
|
+
const candidates = new Set([
|
|
1997
|
+
this.node.identity.publicKey.hashcode(),
|
|
1998
|
+
]);
|
|
1999
|
+
try {
|
|
2000
|
+
for (const peer of await this.getReplicators()) {
|
|
2001
|
+
candidates.add(peer);
|
|
2002
|
+
}
|
|
2003
|
+
}
|
|
2004
|
+
catch {
|
|
2005
|
+
for (const peer of this.uniqueReplicators) {
|
|
2006
|
+
candidates.add(peer);
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
2009
|
+
for (const peer of extraPeers ?? []) {
|
|
2010
|
+
candidates.add(peer);
|
|
2011
|
+
}
|
|
2012
|
+
if (options?.includeSubscribers !== false) {
|
|
2013
|
+
try {
|
|
2014
|
+
for (const subscriber of (await this._getTopicSubscribers(this.topic)) ?? []) {
|
|
2015
|
+
candidates.add(subscriber.hashcode());
|
|
2016
|
+
}
|
|
2017
|
+
}
|
|
2018
|
+
catch {
|
|
2019
|
+
// Best-effort only; explicit repair peers still keep the path safe.
|
|
2020
|
+
}
|
|
2021
|
+
}
|
|
2022
|
+
return candidates;
|
|
2023
|
+
}
|
|
2024
|
+
removeRepairFrontierTarget(target) {
|
|
2025
|
+
for (const mode of REPAIR_DISPATCH_MODES) {
|
|
2026
|
+
this._repairFrontierByMode.get(mode)?.delete(target);
|
|
2027
|
+
this._repairFrontierActiveTargetsByMode.get(mode)?.delete(target);
|
|
2028
|
+
}
|
|
2029
|
+
}
|
|
2030
|
+
async sendRepairConfirmation(target, hashes) {
|
|
2031
|
+
const uniqueHashes = [...new Set(hashes)];
|
|
2032
|
+
for (let i = 0; i < uniqueHashes.length; i += REPAIR_CONFIRMATION_HASH_BATCH_SIZE) {
|
|
2033
|
+
const chunk = uniqueHashes.slice(i, i + REPAIR_CONFIRMATION_HASH_BATCH_SIZE);
|
|
2034
|
+
await this.rpc.send(new ConfirmEntriesMessage({ hashes: chunk }), {
|
|
2035
|
+
priority: 1,
|
|
2036
|
+
mode: new SilentDelivery({ to: [target], redundancy: 1 }),
|
|
2037
|
+
});
|
|
2038
|
+
}
|
|
2039
|
+
}
|
|
2040
|
+
async pushRepairEntries(target, entries) {
|
|
2041
|
+
for await (const message of createExchangeHeadsMessages(this.log, [...entries.keys()])) {
|
|
2042
|
+
await this.rpc.send(message, {
|
|
2043
|
+
priority: 1,
|
|
2044
|
+
mode: new SilentDelivery({ to: [target], redundancy: 1 }),
|
|
2045
|
+
});
|
|
2046
|
+
}
|
|
2047
|
+
}
|
|
2048
|
+
async sendRepairEntriesWithTransport(target, entries, transport, options) {
|
|
2049
|
+
const unknownEntries = new Map();
|
|
2050
|
+
const knownHashes = [];
|
|
2051
|
+
for (const [hash, entry] of entries) {
|
|
2052
|
+
if (options?.bypassKnownPeers || !this.isEntryKnownByPeer(hash, target)) {
|
|
2053
|
+
unknownEntries.set(hash, entry);
|
|
2054
|
+
}
|
|
2055
|
+
else {
|
|
2056
|
+
knownHashes.push(hash);
|
|
2057
|
+
}
|
|
2058
|
+
}
|
|
2059
|
+
this.clearRepairFrontierHashes(target, knownHashes);
|
|
2060
|
+
if (unknownEntries.size === 0) {
|
|
2061
|
+
return;
|
|
2062
|
+
}
|
|
2063
|
+
if (transport === "simple") {
|
|
2064
|
+
// Fallback repair should not depend on the target completing the
|
|
2065
|
+
// RequestMaybeSync -> ResponseMaybeSync round trip.
|
|
2066
|
+
await this.pushRepairEntries(target, unknownEntries);
|
|
2067
|
+
return;
|
|
2068
|
+
}
|
|
2069
|
+
await this.syncronizer.onMaybeMissingEntries({
|
|
2070
|
+
entries: unknownEntries,
|
|
2071
|
+
targets: [target],
|
|
2072
|
+
});
|
|
2073
|
+
}
|
|
2074
|
+
async sendMaybeMissingEntriesNow(target, entries, options) {
|
|
2075
|
+
if (entries.size === 0) {
|
|
2076
|
+
return;
|
|
2077
|
+
}
|
|
2078
|
+
const now = Date.now();
|
|
2079
|
+
let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
|
|
2080
|
+
if (!recentlyDispatchedByHash) {
|
|
2081
|
+
recentlyDispatchedByHash = new Map();
|
|
2082
|
+
this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
|
|
2083
|
+
}
|
|
2084
|
+
for (const [hash, ts] of recentlyDispatchedByHash) {
|
|
2085
|
+
if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
2086
|
+
recentlyDispatchedByHash.delete(hash);
|
|
2087
|
+
}
|
|
2088
|
+
}
|
|
2089
|
+
const filteredEntries = options.bypassRecentDedupe === true
|
|
2090
|
+
? new Map(entries)
|
|
2091
|
+
: new Map();
|
|
2092
|
+
if (options.bypassRecentDedupe !== true) {
|
|
2093
|
+
for (const [hash, entry] of entries) {
|
|
2094
|
+
const prev = recentlyDispatchedByHash.get(hash);
|
|
2095
|
+
if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
2096
|
+
continue;
|
|
2097
|
+
}
|
|
2098
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
2099
|
+
filteredEntries.set(hash, entry);
|
|
2100
|
+
}
|
|
2101
|
+
}
|
|
2102
|
+
else {
|
|
2103
|
+
for (const hash of entries.keys()) {
|
|
2104
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
2105
|
+
}
|
|
2106
|
+
}
|
|
2107
|
+
if (filteredEntries.size === 0) {
|
|
2108
|
+
return;
|
|
2109
|
+
}
|
|
2110
|
+
const bucket = this._repairMetrics[options.mode];
|
|
2111
|
+
bucket.dispatches += 1;
|
|
2112
|
+
bucket.entries += filteredEntries.size;
|
|
2113
|
+
if (options.transport === "simple") {
|
|
2114
|
+
bucket.simpleFallbackPasses += 1;
|
|
2115
|
+
}
|
|
2116
|
+
else {
|
|
2117
|
+
bucket.ratelessFirstPasses += 1;
|
|
2118
|
+
}
|
|
2119
|
+
await Promise.resolve(this.sendRepairEntriesWithTransport(target, filteredEntries, options.transport, { bypassKnownPeers: options.mode === "churn" })).catch((error) => logger.error(error));
|
|
2120
|
+
}
|
|
2121
|
+
ensureRepairFrontierRunner(mode, target, retryScheduleMs) {
|
|
2122
|
+
const activeTargets = this._repairFrontierActiveTargetsByMode.get(mode);
|
|
2123
|
+
if (!activeTargets || activeTargets.has(target) || this.closed) {
|
|
2124
|
+
return;
|
|
2125
|
+
}
|
|
2126
|
+
activeTargets.add(target);
|
|
2127
|
+
const retrySchedule = resolveRepairRetrySchedule(mode, retryScheduleMs, this.isFrontierTrackedRepairMode(mode));
|
|
2128
|
+
const steadyStateDelay = retrySchedule.length > 1
|
|
2129
|
+
? Math.max(1, retrySchedule[retrySchedule.length - 1] - retrySchedule[retrySchedule.length - 2])
|
|
2130
|
+
: Math.max(retrySchedule[0] || 1_000, 1_000);
|
|
2131
|
+
void (async () => {
|
|
2132
|
+
let attemptIndex = 0;
|
|
2133
|
+
try {
|
|
2134
|
+
for (;;) {
|
|
2135
|
+
if (this.closed) {
|
|
2136
|
+
return;
|
|
2137
|
+
}
|
|
2138
|
+
const pending = this._repairFrontierByMode.get(mode)?.get(target);
|
|
2139
|
+
if (!pending || pending.size === 0) {
|
|
2140
|
+
return;
|
|
2141
|
+
}
|
|
2142
|
+
if ((mode === "join-warmup" || mode === "join-authoritative") &&
|
|
2143
|
+
this.isAssumeSyncedRepairSuppressed()) {
|
|
2144
|
+
await this.sleepTracked(Math.max(250, this._assumeSyncedRepairSuppressedUntil - Date.now()));
|
|
2145
|
+
continue;
|
|
2146
|
+
}
|
|
2147
|
+
await this.sendMaybeMissingEntriesNow(target, pending, {
|
|
2148
|
+
mode,
|
|
2149
|
+
transport: getRepairTransportForAttempt(mode, attemptIndex),
|
|
2150
|
+
bypassRecentDedupe: true,
|
|
2151
|
+
});
|
|
2152
|
+
const remaining = this._repairFrontierByMode.get(mode)?.get(target);
|
|
2153
|
+
if (!remaining || remaining.size === 0) {
|
|
2154
|
+
return;
|
|
2155
|
+
}
|
|
2156
|
+
const waitMs = attemptIndex + 1 < retrySchedule.length
|
|
2157
|
+
? Math.max(0, retrySchedule[attemptIndex + 1] - retrySchedule[attemptIndex])
|
|
2158
|
+
: steadyStateDelay;
|
|
2159
|
+
attemptIndex = Math.min(attemptIndex + 1, retrySchedule.length - 1);
|
|
2160
|
+
await this.sleepTracked(waitMs);
|
|
2161
|
+
}
|
|
2162
|
+
}
|
|
2163
|
+
finally {
|
|
2164
|
+
activeTargets.delete(target);
|
|
2165
|
+
if (!this.closed &&
|
|
2166
|
+
(this._repairFrontierByMode.get(mode)?.get(target)?.size || 0) > 0) {
|
|
2167
|
+
this.ensureRepairFrontierRunner(mode, target, retryScheduleMs);
|
|
2168
|
+
}
|
|
2169
|
+
}
|
|
2170
|
+
})().catch((error) => {
|
|
2171
|
+
activeTargets.delete(target);
|
|
2172
|
+
logger.error(error);
|
|
2173
|
+
});
|
|
2174
|
+
}
|
|
2175
|
+
flushAppendBackfill() {
|
|
2176
|
+
if (this._appendBackfillPendingByTarget.size === 0) {
|
|
2177
|
+
return;
|
|
2178
|
+
}
|
|
2179
|
+
const pending = this._appendBackfillPendingByTarget;
|
|
2180
|
+
this._appendBackfillPendingByTarget = new Map();
|
|
2181
|
+
for (const [target, entries] of pending) {
|
|
2182
|
+
this.dispatchMaybeMissingEntries(target, entries, {
|
|
2183
|
+
mode: "append-backfill",
|
|
2184
|
+
});
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
queueAppendBackfill(target, entry) {
|
|
2188
|
+
let entries = this._appendBackfillPendingByTarget.get(target);
|
|
2189
|
+
if (!entries) {
|
|
2190
|
+
entries = new Map();
|
|
2191
|
+
this._appendBackfillPendingByTarget.set(target, entries);
|
|
2192
|
+
}
|
|
2193
|
+
entries.set(entry.hash, entry);
|
|
2194
|
+
if (entries.size >= this.repairSweepTargetBufferSize) {
|
|
2195
|
+
this.flushAppendBackfill();
|
|
2196
|
+
return;
|
|
2197
|
+
}
|
|
2198
|
+
if (this._appendBackfillTimer || this.closed) {
|
|
2199
|
+
return;
|
|
2200
|
+
}
|
|
2201
|
+
const timer = setTimeout(() => {
|
|
2202
|
+
this._repairRetryTimers.delete(timer);
|
|
2203
|
+
if (this._appendBackfillTimer === timer) {
|
|
2204
|
+
this._appendBackfillTimer = undefined;
|
|
2205
|
+
}
|
|
2206
|
+
if (this.closed) {
|
|
2207
|
+
return;
|
|
2208
|
+
}
|
|
2209
|
+
this.flushAppendBackfill();
|
|
2210
|
+
}, APPEND_BACKFILL_DELAY_MS);
|
|
2211
|
+
timer.unref?.();
|
|
2212
|
+
this._repairRetryTimers.add(timer);
|
|
2213
|
+
this._appendBackfillTimer = timer;
|
|
2214
|
+
}
|
|
1747
2215
|
dispatchMaybeMissingEntries(target, entries, options) {
|
|
1748
2216
|
if (entries.size === 0) {
|
|
1749
2217
|
return;
|
|
1750
2218
|
}
|
|
2219
|
+
if (this.isFrontierTrackedRepairMode(options.mode)) {
|
|
2220
|
+
this.queueRepairFrontierEntries(options.mode, target, entries);
|
|
2221
|
+
this.ensureRepairFrontierRunner(options.mode, target, options.retryScheduleMs);
|
|
2222
|
+
return;
|
|
2223
|
+
}
|
|
1751
2224
|
const now = Date.now();
|
|
1752
2225
|
let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
|
|
1753
2226
|
if (!recentlyDispatchedByHash) {
|
|
@@ -1759,10 +2232,10 @@ let SharedLog = (() => {
|
|
|
1759
2232
|
recentlyDispatchedByHash.delete(hash);
|
|
1760
2233
|
}
|
|
1761
2234
|
}
|
|
1762
|
-
const filteredEntries = options
|
|
2235
|
+
const filteredEntries = options.bypassRecentDedupe === true
|
|
1763
2236
|
? new Map(entries)
|
|
1764
2237
|
: new Map();
|
|
1765
|
-
if (options
|
|
2238
|
+
if (options.bypassRecentDedupe !== true) {
|
|
1766
2239
|
for (const [hash, entry] of entries) {
|
|
1767
2240
|
const prev = recentlyDispatchedByHash.get(hash);
|
|
1768
2241
|
if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
@@ -1780,95 +2253,186 @@ let SharedLog = (() => {
|
|
|
1780
2253
|
if (filteredEntries.size === 0) {
|
|
1781
2254
|
return;
|
|
1782
2255
|
}
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
const
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
if (
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
}
|
|
1800
|
-
return Promise.resolve(this.syncronizer.onMaybeMissingEntries({
|
|
1801
|
-
entries: filteredEntries,
|
|
1802
|
-
targets: [target],
|
|
1803
|
-
})).catch((error) => logger.error(error));
|
|
2256
|
+
if ((options.mode === "join-warmup" ||
|
|
2257
|
+
options.mode === "join-authoritative") &&
|
|
2258
|
+
this.isAssumeSyncedRepairSuppressed()) {
|
|
2259
|
+
return;
|
|
2260
|
+
}
|
|
2261
|
+
const retrySchedule = resolveRepairRetrySchedule(options.mode, options.retryScheduleMs, this.isFrontierTrackedRepairMode(options.mode));
|
|
2262
|
+
const bucket = this._repairMetrics[options.mode];
|
|
2263
|
+
bucket.dispatches += 1;
|
|
2264
|
+
bucket.entries += filteredEntries.size;
|
|
2265
|
+
const run = (transport) => {
|
|
2266
|
+
if (transport === "simple") {
|
|
2267
|
+
bucket.simpleFallbackPasses += 1;
|
|
2268
|
+
}
|
|
2269
|
+
else {
|
|
2270
|
+
bucket.ratelessFirstPasses += 1;
|
|
2271
|
+
}
|
|
2272
|
+
return Promise.resolve(this.sendRepairEntriesWithTransport(target, filteredEntries, transport, { bypassKnownPeers: options.mode === "churn" })).catch((error) => logger.error(error));
|
|
1804
2273
|
};
|
|
1805
|
-
|
|
2274
|
+
retrySchedule.forEach((delayMs, index) => {
|
|
2275
|
+
const transport = getRepairTransportForAttempt(options.mode, index);
|
|
1806
2276
|
if (delayMs === 0) {
|
|
1807
|
-
void run();
|
|
1808
|
-
|
|
2277
|
+
void run(transport);
|
|
2278
|
+
return;
|
|
1809
2279
|
}
|
|
1810
2280
|
const timer = setTimeout(() => {
|
|
1811
2281
|
this._repairRetryTimers.delete(timer);
|
|
1812
2282
|
if (this.closed) {
|
|
1813
2283
|
return;
|
|
1814
2284
|
}
|
|
1815
|
-
void run();
|
|
2285
|
+
void run(transport);
|
|
1816
2286
|
}, delayMs);
|
|
1817
2287
|
timer.unref?.();
|
|
1818
2288
|
this._repairRetryTimers.add(timer);
|
|
1819
|
-
}
|
|
2289
|
+
});
|
|
1820
2290
|
}
|
|
1821
2291
|
scheduleRepairSweep(options) {
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
2292
|
+
this._repairSweepPendingModes.add(options.mode);
|
|
2293
|
+
const pendingPeers = this._repairSweepPendingPeersByMode.get(options.mode);
|
|
2294
|
+
if (pendingPeers) {
|
|
2295
|
+
for (const peer of options.peers ?? []) {
|
|
2296
|
+
pendingPeers.add(peer);
|
|
2297
|
+
}
|
|
1827
2298
|
}
|
|
1828
2299
|
if (!this._repairSweepRunning && !this.closed) {
|
|
1829
2300
|
this._repairSweepRunning = true;
|
|
1830
2301
|
void this.runRepairSweep();
|
|
1831
2302
|
}
|
|
1832
2303
|
}
|
|
2304
|
+
scheduleJoinAuthoritativeRepair(peers) {
|
|
2305
|
+
if (this.closed || peers.size === 0) {
|
|
2306
|
+
return;
|
|
2307
|
+
}
|
|
2308
|
+
for (const delayMs of JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS) {
|
|
2309
|
+
let pendingPeers = this._joinAuthoritativeRepairPeersByDelay.get(delayMs);
|
|
2310
|
+
if (!pendingPeers) {
|
|
2311
|
+
pendingPeers = new Set();
|
|
2312
|
+
this._joinAuthoritativeRepairPeersByDelay.set(delayMs, pendingPeers);
|
|
2313
|
+
}
|
|
2314
|
+
for (const peer of peers) {
|
|
2315
|
+
pendingPeers.add(peer);
|
|
2316
|
+
}
|
|
2317
|
+
if (this._joinAuthoritativeRepairTimersByDelay.has(delayMs)) {
|
|
2318
|
+
continue;
|
|
2319
|
+
}
|
|
2320
|
+
const timer = setTimeout(() => {
|
|
2321
|
+
this._repairRetryTimers.delete(timer);
|
|
2322
|
+
this._joinAuthoritativeRepairTimersByDelay.delete(delayMs);
|
|
2323
|
+
if (this.closed) {
|
|
2324
|
+
return;
|
|
2325
|
+
}
|
|
2326
|
+
const peersForSweep = new Set(this._joinAuthoritativeRepairPeersByDelay.get(delayMs) ?? []);
|
|
2327
|
+
this._joinAuthoritativeRepairPeersByDelay.delete(delayMs);
|
|
2328
|
+
if (peersForSweep.size === 0) {
|
|
2329
|
+
return;
|
|
2330
|
+
}
|
|
2331
|
+
// A joiner's leader view can still be partial on the first delayed pass
|
|
2332
|
+
// under pubsub jitter. Bounded per-peer rescans widen the authoritative
|
|
2333
|
+
// frontier without adding per-append sweeps.
|
|
2334
|
+
this.scheduleRepairSweep({
|
|
2335
|
+
mode: "join-authoritative",
|
|
2336
|
+
peers: peersForSweep,
|
|
2337
|
+
});
|
|
2338
|
+
}, delayMs);
|
|
2339
|
+
timer.unref?.();
|
|
2340
|
+
this._repairRetryTimers.add(timer);
|
|
2341
|
+
this._joinAuthoritativeRepairTimersByDelay.set(delayMs, timer);
|
|
2342
|
+
}
|
|
2343
|
+
}
|
|
1833
2344
|
async runRepairSweep() {
|
|
1834
2345
|
try {
|
|
1835
2346
|
while (!this.closed) {
|
|
1836
|
-
const
|
|
1837
|
-
const
|
|
1838
|
-
this.
|
|
1839
|
-
this.
|
|
1840
|
-
|
|
2347
|
+
const pendingModes = new Set(this._repairSweepPendingModes);
|
|
2348
|
+
const pendingPeersByMode = cloneRepairPendingPeersByMode(this._repairSweepPendingPeersByMode);
|
|
2349
|
+
this._repairSweepPendingModes.clear();
|
|
2350
|
+
for (const peers of this._repairSweepPendingPeersByMode.values()) {
|
|
2351
|
+
peers.clear();
|
|
2352
|
+
}
|
|
2353
|
+
if (pendingModes.size === 0) {
|
|
1841
2354
|
return;
|
|
1842
2355
|
}
|
|
1843
|
-
const
|
|
1844
|
-
const
|
|
1845
|
-
|
|
2356
|
+
const optimisticGidPeersByMode = new Map();
|
|
2357
|
+
const optimisticGidPeersConsumedByMode = new Map();
|
|
2358
|
+
for (const mode of pendingModes) {
|
|
2359
|
+
const modePeers = pendingPeersByMode.get(mode);
|
|
2360
|
+
if (!modePeers || modePeers.size === 0) {
|
|
2361
|
+
continue;
|
|
2362
|
+
}
|
|
2363
|
+
const optimisticGidPeers = new Map();
|
|
2364
|
+
const optimisticGidPeersConsumed = new Map();
|
|
2365
|
+
for (const [gid, peerCounts] of this._repairSweepOptimisticGidPeersPending) {
|
|
2366
|
+
let matchedPeers;
|
|
2367
|
+
let matchedCounts;
|
|
2368
|
+
for (const [peer, count] of peerCounts) {
|
|
2369
|
+
if (!modePeers.has(peer)) {
|
|
2370
|
+
continue;
|
|
2371
|
+
}
|
|
2372
|
+
matchedPeers ||= new Set();
|
|
2373
|
+
matchedCounts ||= new Map();
|
|
2374
|
+
matchedPeers.add(peer);
|
|
2375
|
+
matchedCounts.set(peer, count);
|
|
2376
|
+
}
|
|
2377
|
+
if (matchedPeers && matchedCounts) {
|
|
2378
|
+
optimisticGidPeers.set(gid, matchedPeers);
|
|
2379
|
+
optimisticGidPeersConsumed.set(gid, matchedCounts);
|
|
2380
|
+
}
|
|
2381
|
+
}
|
|
2382
|
+
if (optimisticGidPeers.size > 0) {
|
|
2383
|
+
optimisticGidPeersByMode.set(mode, optimisticGidPeers);
|
|
2384
|
+
optimisticGidPeersConsumedByMode.set(mode, optimisticGidPeersConsumed);
|
|
2385
|
+
}
|
|
2386
|
+
}
|
|
2387
|
+
const pendingByMode = new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
|
|
2388
|
+
const pendingRepairPeers = new Set();
|
|
2389
|
+
for (const peers of pendingPeersByMode.values()) {
|
|
2390
|
+
for (const peer of peers) {
|
|
2391
|
+
pendingRepairPeers.add(peer);
|
|
2392
|
+
}
|
|
2393
|
+
}
|
|
2394
|
+
const fullReplicaRepairCandidates = await this.getFullReplicaRepairCandidates(pendingRepairPeers, {
|
|
2395
|
+
includeSubscribers: false,
|
|
2396
|
+
});
|
|
2397
|
+
const fullReplicaRepairCandidateCount = Math.max(1, fullReplicaRepairCandidates.size);
|
|
2398
|
+
const nextFrontierByMode = new Map([
|
|
2399
|
+
["join-authoritative", new Map()],
|
|
2400
|
+
["churn", new Map()],
|
|
2401
|
+
]);
|
|
2402
|
+
const flushTarget = (mode, target) => {
|
|
2403
|
+
const targets = pendingByMode.get(mode);
|
|
2404
|
+
const entries = targets?.get(target);
|
|
1846
2405
|
if (!entries || entries.size === 0) {
|
|
1847
2406
|
return;
|
|
1848
2407
|
}
|
|
1849
|
-
const isJoinWarmupTarget = addedPeers.has(target);
|
|
1850
|
-
const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
|
|
1851
2408
|
this.dispatchMaybeMissingEntries(target, entries, {
|
|
1852
|
-
bypassRecentDedupe,
|
|
1853
|
-
|
|
1854
|
-
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
1855
|
-
: undefined,
|
|
1856
|
-
forceFreshDelivery,
|
|
2409
|
+
bypassRecentDedupe: true,
|
|
2410
|
+
mode,
|
|
1857
2411
|
});
|
|
1858
|
-
|
|
2412
|
+
targets?.delete(target);
|
|
1859
2413
|
};
|
|
1860
|
-
const queueEntryForTarget = (target, entry) => {
|
|
1861
|
-
|
|
2414
|
+
const queueEntryForTarget = (mode, target, entry) => {
|
|
2415
|
+
const sweepTargets = nextFrontierByMode.get(mode);
|
|
2416
|
+
if (sweepTargets) {
|
|
2417
|
+
let sweepSet = sweepTargets.get(target);
|
|
2418
|
+
if (!sweepSet) {
|
|
2419
|
+
sweepSet = new Map();
|
|
2420
|
+
sweepTargets.set(target, sweepSet);
|
|
2421
|
+
}
|
|
2422
|
+
sweepSet.set(entry.hash, entry);
|
|
2423
|
+
}
|
|
2424
|
+
const targets = pendingByMode.get(mode);
|
|
2425
|
+
let set = targets.get(target);
|
|
1862
2426
|
if (!set) {
|
|
1863
2427
|
set = new Map();
|
|
1864
|
-
|
|
2428
|
+
targets.set(target, set);
|
|
1865
2429
|
}
|
|
1866
2430
|
if (set.has(entry.hash)) {
|
|
1867
2431
|
return;
|
|
1868
2432
|
}
|
|
1869
2433
|
set.set(entry.hash, entry);
|
|
1870
2434
|
if (set.size >= this.repairSweepTargetBufferSize) {
|
|
1871
|
-
flushTarget(target);
|
|
2435
|
+
flushTarget(mode, target);
|
|
1872
2436
|
}
|
|
1873
2437
|
};
|
|
1874
2438
|
const iterator = this.entryCoordinatesIndex.iterate({});
|
|
@@ -1877,20 +2441,42 @@ let SharedLog = (() => {
|
|
|
1877
2441
|
const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
|
|
1878
2442
|
for (const entry of entries) {
|
|
1879
2443
|
const entryReplicated = entry.value;
|
|
1880
|
-
const
|
|
2444
|
+
const gid = entryReplicated.gid;
|
|
2445
|
+
const knownPeers = this._gidPeersHistory.get(gid);
|
|
2446
|
+
const requestedReplicas = decodeReplicas(entryReplicated).getValue(this);
|
|
1881
2447
|
const currentPeers = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
|
|
1882
|
-
if (
|
|
2448
|
+
if (pendingModes.has("churn")) {
|
|
1883
2449
|
for (const [currentPeer] of currentPeers) {
|
|
1884
2450
|
if (currentPeer === this.node.identity.publicKey.hashcode()) {
|
|
1885
2451
|
continue;
|
|
1886
2452
|
}
|
|
1887
|
-
queueEntryForTarget(currentPeer, entryReplicated);
|
|
2453
|
+
queueEntryForTarget("churn", currentPeer, entryReplicated);
|
|
1888
2454
|
}
|
|
1889
2455
|
}
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
2456
|
+
for (const mode of pendingModes) {
|
|
2457
|
+
const modePeers = pendingPeersByMode.get(mode);
|
|
2458
|
+
if (!modePeers || modePeers.size === 0) {
|
|
2459
|
+
continue;
|
|
2460
|
+
}
|
|
2461
|
+
const optimisticPeers = optimisticGidPeersByMode.get(mode)?.get(gid);
|
|
2462
|
+
for (const peer of modePeers) {
|
|
2463
|
+
if (this.isEntryKnownByPeer(entryReplicated.hash, peer)) {
|
|
2464
|
+
continue;
|
|
2465
|
+
}
|
|
2466
|
+
const wasOptimisticallyAssigned = optimisticPeers?.has(peer) === true;
|
|
2467
|
+
const isCoveredByFullReplicaRepair = mode === "join-authoritative" &&
|
|
2468
|
+
fullReplicaRepairCandidates.has(peer) &&
|
|
2469
|
+
requestedReplicas >= fullReplicaRepairCandidateCount;
|
|
2470
|
+
const shouldQueue = mode === "join-authoritative"
|
|
2471
|
+
? currentPeers.has(peer) || isCoveredByFullReplicaRepair
|
|
2472
|
+
: wasOptimisticallyAssigned ||
|
|
2473
|
+
(currentPeers.has(peer) && !knownPeers?.has(peer));
|
|
2474
|
+
if (shouldQueue) {
|
|
2475
|
+
// Authoritative join repair must not trust partial gid peer history,
|
|
2476
|
+
// otherwise a late joiner can get stuck with a partial historical
|
|
2477
|
+
// backfill forever. Once we enter the authoritative pass, queue every
|
|
2478
|
+
// entry whose current leader set still includes the added peer.
|
|
2479
|
+
queueEntryForTarget(mode, peer, entryReplicated);
|
|
1894
2480
|
}
|
|
1895
2481
|
}
|
|
1896
2482
|
}
|
|
@@ -1900,8 +2486,64 @@ let SharedLog = (() => {
|
|
|
1900
2486
|
finally {
|
|
1901
2487
|
await iterator.close();
|
|
1902
2488
|
}
|
|
1903
|
-
for (const
|
|
1904
|
-
|
|
2489
|
+
for (const [, optimisticGidPeersConsumed] of optimisticGidPeersConsumedByMode) {
|
|
2490
|
+
for (const [gid, peerCounts] of optimisticGidPeersConsumed) {
|
|
2491
|
+
const pendingPeerCounts = this._repairSweepOptimisticGidPeersPending.get(gid);
|
|
2492
|
+
if (!pendingPeerCounts) {
|
|
2493
|
+
continue;
|
|
2494
|
+
}
|
|
2495
|
+
for (const [peer, count] of peerCounts) {
|
|
2496
|
+
const current = pendingPeerCounts.get(peer) || 0;
|
|
2497
|
+
const next = current - count;
|
|
2498
|
+
if (next > 0) {
|
|
2499
|
+
pendingPeerCounts.set(peer, next);
|
|
2500
|
+
}
|
|
2501
|
+
else {
|
|
2502
|
+
pendingPeerCounts.delete(peer);
|
|
2503
|
+
}
|
|
2504
|
+
}
|
|
2505
|
+
if (pendingPeerCounts.size === 0) {
|
|
2506
|
+
this._repairSweepOptimisticGidPeersPending.delete(gid);
|
|
2507
|
+
}
|
|
2508
|
+
}
|
|
2509
|
+
}
|
|
2510
|
+
for (const mode of pendingModes) {
|
|
2511
|
+
if (mode !== "join-authoritative" && mode !== "churn") {
|
|
2512
|
+
continue;
|
|
2513
|
+
}
|
|
2514
|
+
const nextTargets = nextFrontierByMode.get(mode) ?? new Map();
|
|
2515
|
+
const frontierTargets = this._repairFrontierByMode.get(mode);
|
|
2516
|
+
for (const target of pendingPeersByMode.get(mode) ?? []) {
|
|
2517
|
+
const replacement = nextTargets.get(target);
|
|
2518
|
+
if (mode === "join-authoritative") {
|
|
2519
|
+
// Authoritative join repair is receipt-driven: a later sweep can have a
|
|
2520
|
+
// narrower transient leader view, but it must not forget unconfirmed
|
|
2521
|
+
// hashes that were already queued for this joiner.
|
|
2522
|
+
if (replacement && replacement.size > 0) {
|
|
2523
|
+
const existing = frontierTargets?.get(target);
|
|
2524
|
+
if (existing && existing.size > 0) {
|
|
2525
|
+
for (const [hash, entry] of replacement) {
|
|
2526
|
+
existing.set(hash, entry);
|
|
2527
|
+
}
|
|
2528
|
+
}
|
|
2529
|
+
else {
|
|
2530
|
+
frontierTargets?.set(target, replacement);
|
|
2531
|
+
}
|
|
2532
|
+
}
|
|
2533
|
+
continue;
|
|
2534
|
+
}
|
|
2535
|
+
if (replacement && replacement.size > 0) {
|
|
2536
|
+
frontierTargets?.set(target, replacement);
|
|
2537
|
+
}
|
|
2538
|
+
else {
|
|
2539
|
+
frontierTargets?.delete(target);
|
|
2540
|
+
}
|
|
2541
|
+
}
|
|
2542
|
+
}
|
|
2543
|
+
for (const [mode, targets] of pendingByMode) {
|
|
2544
|
+
for (const target of [...targets.keys()]) {
|
|
2545
|
+
flushTarget(mode, target);
|
|
2546
|
+
}
|
|
1905
2547
|
}
|
|
1906
2548
|
}
|
|
1907
2549
|
}
|
|
@@ -1912,17 +2554,78 @@ let SharedLog = (() => {
|
|
|
1912
2554
|
}
|
|
1913
2555
|
finally {
|
|
1914
2556
|
this._repairSweepRunning = false;
|
|
1915
|
-
if (!this.closed &&
|
|
1916
|
-
(this._repairSweepForceFreshPending ||
|
|
1917
|
-
this._repairSweepAddedPeersPending.size > 0)) {
|
|
2557
|
+
if (!this.closed && this._repairSweepPendingModes.size > 0) {
|
|
1918
2558
|
this._repairSweepRunning = true;
|
|
1919
2559
|
void this.runRepairSweep();
|
|
1920
2560
|
}
|
|
1921
2561
|
}
|
|
1922
2562
|
}
|
|
1923
2563
|
async pruneDebouncedFnAddIfNotKeeping(args) {
|
|
1924
|
-
if (
|
|
1925
|
-
return
|
|
2564
|
+
if (this.keep && (await this.keep(args.value.entry))) {
|
|
2565
|
+
return false;
|
|
2566
|
+
}
|
|
2567
|
+
void this.pruneDebouncedFn.add(args);
|
|
2568
|
+
return true;
|
|
2569
|
+
}
|
|
2570
|
+
async pruneJoinedEntriesNoLongerLed(entries) {
|
|
2571
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2572
|
+
for (const entry of entries) {
|
|
2573
|
+
if (this.closed || this._pendingDeletes.has(entry.hash)) {
|
|
2574
|
+
continue;
|
|
2575
|
+
}
|
|
2576
|
+
const leaders = await this.findLeadersFromEntry(entry, decodeReplicas(entry).getValue(this), { roleAge: 0 });
|
|
2577
|
+
if (leaders.has(selfHash)) {
|
|
2578
|
+
this.pruneDebouncedFn.delete(entry.hash);
|
|
2579
|
+
continue;
|
|
2580
|
+
}
|
|
2581
|
+
if (leaders.size === 0) {
|
|
2582
|
+
continue;
|
|
2583
|
+
}
|
|
2584
|
+
await this.pruneDebouncedFnAddIfNotKeeping({
|
|
2585
|
+
key: entry.hash,
|
|
2586
|
+
value: { entry, leaders },
|
|
2587
|
+
});
|
|
2588
|
+
this.responseToPruneDebouncedFn.delete(entry.hash);
|
|
2589
|
+
}
|
|
2590
|
+
}
|
|
2591
|
+
async pruneIndexedEntriesNoLongerLed() {
|
|
2592
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
2593
|
+
const iterator = this.entryCoordinatesIndex.iterate({});
|
|
2594
|
+
let enqueuedPrune = false;
|
|
2595
|
+
try {
|
|
2596
|
+
while (!this.closed && !iterator.done()) {
|
|
2597
|
+
const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
|
|
2598
|
+
for (const entry of entries) {
|
|
2599
|
+
const entryReplicated = entry.value;
|
|
2600
|
+
if (this.closed || this._pendingDeletes.has(entryReplicated.hash)) {
|
|
2601
|
+
continue;
|
|
2602
|
+
}
|
|
2603
|
+
const leaders = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
|
|
2604
|
+
if (leaders.has(selfHash)) {
|
|
2605
|
+
this.pruneDebouncedFn.delete(entryReplicated.hash);
|
|
2606
|
+
await this._pendingDeletes
|
|
2607
|
+
.get(entryReplicated.hash)
|
|
2608
|
+
?.reject(new Error("Failed to delete, is leader again"));
|
|
2609
|
+
this.removePruneRequestSent(entryReplicated.hash);
|
|
2610
|
+
continue;
|
|
2611
|
+
}
|
|
2612
|
+
if (leaders.size === 0) {
|
|
2613
|
+
continue;
|
|
2614
|
+
}
|
|
2615
|
+
enqueuedPrune =
|
|
2616
|
+
(await this.pruneDebouncedFnAddIfNotKeeping({
|
|
2617
|
+
key: entryReplicated.hash,
|
|
2618
|
+
value: { entry: entryReplicated, leaders },
|
|
2619
|
+
})) || enqueuedPrune;
|
|
2620
|
+
this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
|
|
2621
|
+
}
|
|
2622
|
+
}
|
|
2623
|
+
}
|
|
2624
|
+
finally {
|
|
2625
|
+
await iterator.close();
|
|
2626
|
+
}
|
|
2627
|
+
if (enqueuedPrune && !this.closed) {
|
|
2628
|
+
await this.pruneDebouncedFn.flush();
|
|
1926
2629
|
}
|
|
1927
2630
|
}
|
|
1928
2631
|
clearCheckedPruneRetry(hash) {
|
|
@@ -2065,16 +2768,17 @@ let SharedLog = (() => {
|
|
|
2065
2768
|
await this._appendDeliverToAllFanout(result.entry);
|
|
2066
2769
|
}
|
|
2067
2770
|
else {
|
|
2068
|
-
await this._appendDeliverToReplicators(result.entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
|
|
2771
|
+
await this._appendDeliverToReplicators(result.entry, coordinates, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
|
|
2069
2772
|
}
|
|
2070
2773
|
}
|
|
2071
|
-
|
|
2774
|
+
const delayAdaptiveRebalance = this.shouldDelayAdaptiveRebalance();
|
|
2775
|
+
if (!isLeader && !delayAdaptiveRebalance) {
|
|
2072
2776
|
this.pruneDebouncedFnAddIfNotKeeping({
|
|
2073
2777
|
key: result.entry.hash,
|
|
2074
2778
|
value: { entry: result.entry, leaders },
|
|
2075
2779
|
});
|
|
2076
2780
|
}
|
|
2077
|
-
if (!
|
|
2781
|
+
if (!delayAdaptiveRebalance) {
|
|
2078
2782
|
this.rebalanceParticipationDebounced?.call();
|
|
2079
2783
|
}
|
|
2080
2784
|
return result;
|
|
@@ -2108,8 +2812,18 @@ let SharedLog = (() => {
|
|
|
2108
2812
|
this._repairRetryTimers = new Set();
|
|
2109
2813
|
this._recentRepairDispatch = new Map();
|
|
2110
2814
|
this._repairSweepRunning = false;
|
|
2111
|
-
this.
|
|
2112
|
-
this.
|
|
2815
|
+
this._repairSweepPendingModes = new Set();
|
|
2816
|
+
this._repairSweepPendingPeersByMode = createRepairPendingPeersByMode();
|
|
2817
|
+
this._repairFrontierByMode = createRepairFrontierByMode();
|
|
2818
|
+
this._repairFrontierActiveTargetsByMode = createRepairActiveTargetsByMode();
|
|
2819
|
+
this._repairSweepOptimisticGidPeersPending = new Map();
|
|
2820
|
+
this._entryKnownPeers = new Map();
|
|
2821
|
+
this._joinAuthoritativeRepairTimersByDelay = new Map();
|
|
2822
|
+
this._joinAuthoritativeRepairPeersByDelay = new Map();
|
|
2823
|
+
this._assumeSyncedRepairSuppressedUntil = 0;
|
|
2824
|
+
this._appendBackfillTimer = undefined;
|
|
2825
|
+
this._appendBackfillPendingByTarget = new Map();
|
|
2826
|
+
this._repairMetrics = createRepairMetrics();
|
|
2113
2827
|
this._topicSubscribersCache = new Map();
|
|
2114
2828
|
this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
|
|
2115
2829
|
this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
|
|
@@ -2167,7 +2881,10 @@ let SharedLog = (() => {
|
|
|
2167
2881
|
this.keep = options?.keep;
|
|
2168
2882
|
this.pendingMaturity = new Map();
|
|
2169
2883
|
const id = sha256Base64Sync(this.log.id);
|
|
2170
|
-
const storage = await
|
|
2884
|
+
const [storage, logScope] = await Promise.all([
|
|
2885
|
+
this.node.storage.sublevel(id),
|
|
2886
|
+
this.node.indexer.scope(id),
|
|
2887
|
+
]);
|
|
2171
2888
|
const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
|
|
2172
2889
|
const fanoutService = getSharedLogFanoutService(this.node.services);
|
|
2173
2890
|
const blockProviderNamespace = (cid) => `cid:${cid}`;
|
|
@@ -2223,16 +2940,18 @@ let SharedLog = (() => {
|
|
|
2223
2940
|
}
|
|
2224
2941
|
},
|
|
2225
2942
|
});
|
|
2226
|
-
|
|
2227
|
-
const
|
|
2228
|
-
|
|
2943
|
+
const remoteBlocksStartPromise = this.remoteBlocks.start();
|
|
2944
|
+
const [replicationIndex, logIndex] = await Promise.all([
|
|
2945
|
+
logScope.scope("replication"),
|
|
2946
|
+
logScope.scope("log"),
|
|
2947
|
+
]);
|
|
2229
2948
|
this._replicationRangeIndex = await replicationIndex.init({
|
|
2230
2949
|
schema: this.indexableDomain.constructorRange,
|
|
2231
2950
|
});
|
|
2232
2951
|
this._entryCoordinatesIndex = await replicationIndex.init({
|
|
2233
2952
|
schema: this.indexableDomain.constructorEntry,
|
|
2234
2953
|
});
|
|
2235
|
-
|
|
2954
|
+
await remoteBlocksStartPromise;
|
|
2236
2955
|
const hasIndexedReplicationInfo = (await this.replicationIndex.count({
|
|
2237
2956
|
query: [
|
|
2238
2957
|
new StringMatch({
|
|
@@ -2360,27 +3079,33 @@ let SharedLog = (() => {
|
|
|
2360
3079
|
}
|
|
2361
3080
|
}
|
|
2362
3081
|
// Open for communcation
|
|
2363
|
-
await this.rpc.open({
|
|
2364
|
-
queryType: TransportMessage,
|
|
2365
|
-
responseType: TransportMessage,
|
|
2366
|
-
responseHandler: (query, context) => this.onMessage(query, context),
|
|
2367
|
-
topic: this.topic,
|
|
2368
|
-
});
|
|
2369
3082
|
this._onSubscriptionFn =
|
|
2370
3083
|
this._onSubscriptionFn || this._onSubscription.bind(this);
|
|
2371
|
-
await this.node.services.pubsub.addEventListener("subscribe", this._onSubscriptionFn);
|
|
2372
3084
|
this._onUnsubscriptionFn =
|
|
2373
3085
|
this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
|
|
2374
|
-
await
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
3086
|
+
await Promise.all([
|
|
3087
|
+
this.rpc.open({
|
|
3088
|
+
queryType: TransportMessage,
|
|
3089
|
+
responseType: TransportMessage,
|
|
3090
|
+
responseHandler: (query, context) => this.onMessage(query, context),
|
|
3091
|
+
topic: this.topic,
|
|
3092
|
+
}),
|
|
3093
|
+
this.node.services.pubsub.addEventListener("subscribe", this._onSubscriptionFn),
|
|
3094
|
+
this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn),
|
|
3095
|
+
]);
|
|
3096
|
+
const fanoutOpenPromise = this._openFanoutChannel(options?.fanout);
|
|
3097
|
+
// Mark previously-owned replication ranges as "new" only when they already exist.
|
|
3098
|
+
// Fresh opens have nothing to touch here, so skip the extra scan/write entirely.
|
|
3099
|
+
const updateOwnedReplicationPromise = hasIndexedReplicationInfo
|
|
3100
|
+
? this.updateTimestampOfOwnedReplicationRanges()
|
|
3101
|
+
: Promise.resolve();
|
|
3102
|
+
await Promise.all([fanoutOpenPromise, updateOwnedReplicationPromise]);
|
|
2379
3103
|
// if we had a previous session with replication info, and new replication info dictates that we unreplicate
|
|
2380
3104
|
// we should do that. Otherwise if options is a unreplication we dont need to do anything because
|
|
2381
3105
|
// we are already unreplicated (as we are just opening)
|
|
2382
|
-
|
|
2383
|
-
const canResumeReplication =
|
|
3106
|
+
const isUnreplicationOptionsDefined = isUnreplicationOptions(options?.replicate);
|
|
3107
|
+
const canResumeReplication = hasIndexedReplicationInfo &&
|
|
3108
|
+
(await isReplicationOptionsDependentOnPreviousState(options?.replicate, this.replicationIndex, this.node.identity.publicKey));
|
|
2384
3109
|
if (hasIndexedReplicationInfo && isUnreplicationOptionsDefined) {
|
|
2385
3110
|
await this.replicate(options?.replicate, { checkDuplicates: true });
|
|
2386
3111
|
}
|
|
@@ -2423,6 +3148,7 @@ let SharedLog = (() => {
|
|
|
2423
3148
|
}
|
|
2424
3149
|
async afterOpen() {
|
|
2425
3150
|
await super.afterOpen();
|
|
3151
|
+
const existingSubscribersPromise = this._getTopicSubscribers(this.topic);
|
|
2426
3152
|
// We do this here, because these calls requires this.closed == false
|
|
2427
3153
|
void this.pruneOfflineReplicators()
|
|
2428
3154
|
.then(() => {
|
|
@@ -2437,7 +3163,7 @@ let SharedLog = (() => {
|
|
|
2437
3163
|
this.startReplicatorLivenessSweep();
|
|
2438
3164
|
await this.rebalanceParticipation();
|
|
2439
3165
|
// Take into account existing subscription
|
|
2440
|
-
(await
|
|
3166
|
+
(await existingSubscribersPromise)?.forEach((v) => {
|
|
2441
3167
|
if (v.equals(this.node.identity.publicKey)) {
|
|
2442
3168
|
return;
|
|
2443
3169
|
}
|
|
@@ -2952,8 +3678,28 @@ let SharedLog = (() => {
|
|
|
2952
3678
|
this._repairRetryTimers.clear();
|
|
2953
3679
|
this._recentRepairDispatch.clear();
|
|
2954
3680
|
this._repairSweepRunning = false;
|
|
2955
|
-
this.
|
|
2956
|
-
this.
|
|
3681
|
+
this._repairSweepPendingModes.clear();
|
|
3682
|
+
for (const peers of this._repairSweepPendingPeersByMode.values()) {
|
|
3683
|
+
peers.clear();
|
|
3684
|
+
}
|
|
3685
|
+
this._repairSweepOptimisticGidPeersPending.clear();
|
|
3686
|
+
this._entryKnownPeers.clear();
|
|
3687
|
+
for (const timer of this._joinAuthoritativeRepairTimersByDelay.values()) {
|
|
3688
|
+
clearTimeout(timer);
|
|
3689
|
+
}
|
|
3690
|
+
this._joinAuthoritativeRepairTimersByDelay.clear();
|
|
3691
|
+
this._joinAuthoritativeRepairPeersByDelay.clear();
|
|
3692
|
+
for (const targets of this._repairFrontierByMode.values()) {
|
|
3693
|
+
targets.clear();
|
|
3694
|
+
}
|
|
3695
|
+
for (const targets of this._repairFrontierActiveTargetsByMode.values()) {
|
|
3696
|
+
targets.clear();
|
|
3697
|
+
}
|
|
3698
|
+
if (this._appendBackfillTimer) {
|
|
3699
|
+
clearTimeout(this._appendBackfillTimer);
|
|
3700
|
+
this._appendBackfillTimer = undefined;
|
|
3701
|
+
}
|
|
3702
|
+
this._appendBackfillPendingByTarget.clear();
|
|
2957
3703
|
for (const [_k, v] of this._pendingDeletes) {
|
|
2958
3704
|
v.clear();
|
|
2959
3705
|
v.promise.resolve(); // TODO or reject?
|
|
@@ -3114,6 +3860,7 @@ let SharedLog = (() => {
|
|
|
3114
3860
|
logger.trace(`${this.node.identity.publicKey.hashcode()}: Recieved heads: ${heads.length === 1 ? heads[0].entry.hash : "#" + heads.length}, logId: ${this.log.idString}`);
|
|
3115
3861
|
if (heads) {
|
|
3116
3862
|
const filteredHeads = [];
|
|
3863
|
+
const confirmedHashes = new Set();
|
|
3117
3864
|
for (const head of heads) {
|
|
3118
3865
|
if (!(await this.log.has(head.entry.hash))) {
|
|
3119
3866
|
head.entry.init({
|
|
@@ -3123,8 +3870,18 @@ let SharedLog = (() => {
|
|
|
3123
3870
|
});
|
|
3124
3871
|
filteredHeads.push(head);
|
|
3125
3872
|
}
|
|
3873
|
+
else {
|
|
3874
|
+
confirmedHashes.add(head.entry.hash);
|
|
3875
|
+
}
|
|
3876
|
+
}
|
|
3877
|
+
const fromIsSelf = context.from.equals(this.node.identity.publicKey);
|
|
3878
|
+
if (!fromIsSelf) {
|
|
3879
|
+
this.markEntriesKnownByPeer(heads.map((head) => head.entry.hash), context.from.hashcode());
|
|
3126
3880
|
}
|
|
3127
3881
|
if (filteredHeads.length === 0) {
|
|
3882
|
+
if (confirmedHashes.size > 0 && !fromIsSelf) {
|
|
3883
|
+
await this.sendRepairConfirmation(context.from, confirmedHashes);
|
|
3884
|
+
}
|
|
3128
3885
|
return;
|
|
3129
3886
|
}
|
|
3130
3887
|
const groupedByGid = await groupByGid(filteredHeads);
|
|
@@ -3222,7 +3979,12 @@ let SharedLog = (() => {
|
|
|
3222
3979
|
return;
|
|
3223
3980
|
}
|
|
3224
3981
|
if (toMerge.length > 0) {
|
|
3982
|
+
this.markEntriesKnownByPeer(toMerge.map((entry) => entry.hash), context.from.hashcode());
|
|
3225
3983
|
await this.log.join(toMerge);
|
|
3984
|
+
for (const merged of toMerge) {
|
|
3985
|
+
confirmedHashes.add(merged.hash);
|
|
3986
|
+
}
|
|
3987
|
+
await this.pruneJoinedEntriesNoLongerLed(toMerge);
|
|
3226
3988
|
toDelete?.map((x) =>
|
|
3227
3989
|
// TODO types
|
|
3228
3990
|
this.pruneDebouncedFnAddIfNotKeeping({
|
|
@@ -3261,6 +4023,10 @@ let SharedLog = (() => {
|
|
|
3261
4023
|
promises.push(fn()); // we do this concurrently since waitForIsLeader might be a blocking operation for some entries
|
|
3262
4024
|
}
|
|
3263
4025
|
await Promise.all(promises);
|
|
4026
|
+
if (confirmedHashes.size > 0 && !context.from.equals(this.node.identity.publicKey)) {
|
|
4027
|
+
this.markEntriesKnownByPeer(confirmedHashes, context.from.hashcode());
|
|
4028
|
+
await this.sendRepairConfirmation(context.from, confirmedHashes);
|
|
4029
|
+
}
|
|
3264
4030
|
}
|
|
3265
4031
|
}
|
|
3266
4032
|
else if (msg instanceof RequestIPrune) {
|
|
@@ -3268,6 +4034,7 @@ let SharedLog = (() => {
|
|
|
3268
4034
|
const from = context.from.hashcode();
|
|
3269
4035
|
for (const hash of msg.hashes) {
|
|
3270
4036
|
this.removePruneRequestSent(hash, from);
|
|
4037
|
+
this.removeEntriesKnownByPeer([hash], from);
|
|
3271
4038
|
// if we expect the remote to be owner of this entry because we are to prune ourselves, then we need to remove the remote
|
|
3272
4039
|
// this is due to that the remote has previously indicated to be a replicator to help us prune but now has changed their mind
|
|
3273
4040
|
const outGoingPrunes = this._requestIPruneResponseReplicatorSet.get(hash);
|
|
@@ -3355,6 +4122,11 @@ let SharedLog = (() => {
|
|
|
3355
4122
|
this._pendingDeletes.get(hash)?.resolve(context.from.hashcode());
|
|
3356
4123
|
}
|
|
3357
4124
|
}
|
|
4125
|
+
else if (msg instanceof ConfirmEntriesMessage) {
|
|
4126
|
+
this.markEntriesKnownByPeer(msg.hashes, context.from.hashcode());
|
|
4127
|
+
this.clearRepairFrontierHashes(context.from.hashcode(), msg.hashes);
|
|
4128
|
+
return;
|
|
4129
|
+
}
|
|
3358
4130
|
else if (await this.syncronizer.onMessage(msg, context)) {
|
|
3359
4131
|
return; // the syncronizer has handled the message
|
|
3360
4132
|
}
|
|
@@ -3679,6 +4451,11 @@ let SharedLog = (() => {
|
|
|
3679
4451
|
if (options?.replicate) {
|
|
3680
4452
|
let messageToSend = undefined;
|
|
3681
4453
|
if (assumeSynced) {
|
|
4454
|
+
// `assumeSynced` is an explicit contract that this join should trust the
|
|
4455
|
+
// supplied history and avoid initiating outbound repair while the local
|
|
4456
|
+
// replication ranges settle.
|
|
4457
|
+
this._assumeSyncedRepairSuppressedUntil =
|
|
4458
|
+
Date.now() + ASSUME_SYNCED_REPAIR_SUPPRESSION_MS;
|
|
3682
4459
|
for (const entry of entriesToReplicate) {
|
|
3683
4460
|
await seedAssumeSyncedPeerHistory(entry);
|
|
3684
4461
|
}
|
|
@@ -3747,9 +4524,14 @@ let SharedLog = (() => {
|
|
|
3747
4524
|
clear();
|
|
3748
4525
|
// `waitForReplicator()` is typically used as a precondition before join/replicate
|
|
3749
4526
|
// flows. A replicator can become mature and enqueue a debounced rebalance
|
|
3750
|
-
// (`replicationChangeDebounceFn`) slightly later.
|
|
3751
|
-
//
|
|
3752
|
-
|
|
4527
|
+
// (`replicationChangeDebounceFn`) slightly later. Kick the flush, but do not
|
|
4528
|
+
// make membership waits depend on all rebalance work finishing; callers that
|
|
4529
|
+
// need settled distribution already wait for that explicitly.
|
|
4530
|
+
this.replicationChangeDebounceFn?.flush?.().catch((error) => {
|
|
4531
|
+
if (!isNotStartedError(error)) {
|
|
4532
|
+
logger.error(error?.toString?.() ?? String(error));
|
|
4533
|
+
}
|
|
4534
|
+
});
|
|
3753
4535
|
deferred.resolve();
|
|
3754
4536
|
};
|
|
3755
4537
|
const reject = (error) => {
|
|
@@ -4141,11 +4923,51 @@ let SharedLog = (() => {
|
|
|
4141
4923
|
}
|
|
4142
4924
|
}
|
|
4143
4925
|
}
|
|
4926
|
+
if (!options?.candidates) {
|
|
4927
|
+
const fullReplicaLeaders = await this.findFullReplicaLeaders(cursors.length, roleAge, peerFilter);
|
|
4928
|
+
if (fullReplicaLeaders) {
|
|
4929
|
+
return fullReplicaLeaders;
|
|
4930
|
+
}
|
|
4931
|
+
}
|
|
4144
4932
|
return getSamples(cursors, this.replicationIndex, roleAge, this.indexableDomain.numbers, {
|
|
4145
4933
|
peerFilter,
|
|
4146
4934
|
uniqueReplicators: peerFilter,
|
|
4147
4935
|
});
|
|
4148
4936
|
}
|
|
4937
|
+
async findFullReplicaLeaders(replicas, roleAge, peerFilter) {
|
|
4938
|
+
const now = Date.now();
|
|
4939
|
+
const leaders = new Map();
|
|
4940
|
+
const includeStrict = this._logProperties?.strictFullReplicaFallback !== false;
|
|
4941
|
+
const iterator = this.replicationIndex.iterate({}, { shape: { hash: true, timestamp: true, mode: true } });
|
|
4942
|
+
try {
|
|
4943
|
+
for (;;) {
|
|
4944
|
+
const batch = await iterator.next(64);
|
|
4945
|
+
if (batch.length === 0) {
|
|
4946
|
+
break;
|
|
4947
|
+
}
|
|
4948
|
+
for (const result of batch) {
|
|
4949
|
+
const range = result.value;
|
|
4950
|
+
if (peerFilter && !peerFilter.has(range.hash)) {
|
|
4951
|
+
continue;
|
|
4952
|
+
}
|
|
4953
|
+
if (!isMatured(range, now, roleAge)) {
|
|
4954
|
+
continue;
|
|
4955
|
+
}
|
|
4956
|
+
if (range.mode === ReplicationIntent.Strict && !includeStrict) {
|
|
4957
|
+
continue;
|
|
4958
|
+
}
|
|
4959
|
+
leaders.set(range.hash, { intersecting: true });
|
|
4960
|
+
if (leaders.size > replicas) {
|
|
4961
|
+
return undefined;
|
|
4962
|
+
}
|
|
4963
|
+
}
|
|
4964
|
+
}
|
|
4965
|
+
}
|
|
4966
|
+
finally {
|
|
4967
|
+
await iterator.close();
|
|
4968
|
+
}
|
|
4969
|
+
return leaders.size > 0 ? leaders : undefined;
|
|
4970
|
+
}
|
|
4149
4971
|
async findLeadersFromEntry(entry, replicas, options) {
|
|
4150
4972
|
const coordinates = await this.createCoordinates(entry, replicas);
|
|
4151
4973
|
const result = await this._findLeaders(coordinates, options);
|
|
@@ -4613,13 +5435,25 @@ let SharedLog = (() => {
|
|
|
4613
5435
|
}
|
|
4614
5436
|
const changed = false;
|
|
4615
5437
|
const addedPeers = new Set();
|
|
5438
|
+
const authoritativeRepairPeers = new Set();
|
|
4616
5439
|
const warmupPeers = new Set();
|
|
5440
|
+
const churnRepairPeers = new Set();
|
|
4617
5441
|
const hasSelfWarmupChange = changes.some((change) => change.range.hash === selfHash &&
|
|
4618
5442
|
(change.type === "added" || change.type === "replaced"));
|
|
5443
|
+
const hasSelfRangeRemoval = changes.some((change) => change.range.hash === selfHash &&
|
|
5444
|
+
(change.type === "removed" || change.type === "replaced"));
|
|
4619
5445
|
for (const change of changes) {
|
|
5446
|
+
if (change.range.hash !== selfHash &&
|
|
5447
|
+
(change.type === "removed" || change.type === "replaced")) {
|
|
5448
|
+
this.removePeerFromEntryKnownPeers(change.range.hash);
|
|
5449
|
+
}
|
|
4620
5450
|
if (change.type === "added" || change.type === "replaced") {
|
|
4621
5451
|
const hash = change.range.hash;
|
|
4622
5452
|
if (hash !== selfHash) {
|
|
5453
|
+
// Existing peers can widen/shift ranges after the initial join. If we
|
|
5454
|
+
// only rescan on first-seen "added", late authoritative range updates can
|
|
5455
|
+
// leave historical backfill permanently partial under load.
|
|
5456
|
+
authoritativeRepairPeers.add(hash);
|
|
4623
5457
|
// Range updates can reassign entries to an existing peer shortly after it
|
|
4624
5458
|
// already received a subset. Avoid suppressing legitimate follow-up repair.
|
|
4625
5459
|
this._recentRepairDispatch.delete(hash);
|
|
@@ -4651,17 +5485,24 @@ let SharedLog = (() => {
|
|
|
4651
5485
|
return;
|
|
4652
5486
|
}
|
|
4653
5487
|
const isWarmupTarget = warmupPeers.has(target);
|
|
4654
|
-
const
|
|
5488
|
+
const mode = forceFreshDelivery
|
|
5489
|
+
? "churn"
|
|
5490
|
+
: isWarmupTarget
|
|
5491
|
+
? "join-warmup"
|
|
5492
|
+
: "join-authoritative";
|
|
4655
5493
|
this.dispatchMaybeMissingEntries(target, entries, {
|
|
4656
|
-
bypassRecentDedupe,
|
|
4657
|
-
|
|
5494
|
+
bypassRecentDedupe: isWarmupTarget || forceFreshDelivery,
|
|
5495
|
+
mode,
|
|
5496
|
+
retryScheduleMs: mode === "join-warmup"
|
|
4658
5497
|
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
4659
|
-
:
|
|
4660
|
-
|
|
5498
|
+
: mode === "join-authoritative"
|
|
5499
|
+
? [0]
|
|
5500
|
+
: undefined,
|
|
4661
5501
|
});
|
|
4662
5502
|
uncheckedDeliver.delete(target);
|
|
4663
5503
|
};
|
|
4664
5504
|
const queueUncheckedDeliver = (target, entry) => {
|
|
5505
|
+
churnRepairPeers.add(target);
|
|
4665
5506
|
let set = uncheckedDeliver.get(target);
|
|
4666
5507
|
if (!set) {
|
|
4667
5508
|
set = new Map();
|
|
@@ -4715,7 +5556,14 @@ let SharedLog = (() => {
|
|
|
4715
5556
|
}
|
|
4716
5557
|
}
|
|
4717
5558
|
}
|
|
4718
|
-
|
|
5559
|
+
for (const [peer] of currentPeers) {
|
|
5560
|
+
if (warmupPeers.has(peer)) {
|
|
5561
|
+
this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
|
|
5562
|
+
}
|
|
5563
|
+
}
|
|
5564
|
+
const authoritativePeers = [...currentPeers.keys()].filter((peer) => !warmupPeers.has(peer) &&
|
|
5565
|
+
!this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer));
|
|
5566
|
+
this.addPeersToGidPeerHistory(entryReplicated.gid, authoritativePeers, true);
|
|
4719
5567
|
if (!currentPeers.has(selfHash)) {
|
|
4720
5568
|
this.pruneDebouncedFnAddIfNotKeeping({
|
|
4721
5569
|
key: entryReplicated.hash,
|
|
@@ -4763,7 +5611,14 @@ let SharedLog = (() => {
|
|
|
4763
5611
|
}
|
|
4764
5612
|
}
|
|
4765
5613
|
}
|
|
4766
|
-
|
|
5614
|
+
for (const [peer] of currentPeers) {
|
|
5615
|
+
if (addedPeers.has(peer)) {
|
|
5616
|
+
this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
|
|
5617
|
+
}
|
|
5618
|
+
}
|
|
5619
|
+
const authoritativePeers = [...currentPeers.keys()].filter((peer) => !addedPeers.has(peer) &&
|
|
5620
|
+
!this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer));
|
|
5621
|
+
this.addPeersToGidPeerHistory(entryReplicated.gid, authoritativePeers, true);
|
|
4767
5622
|
if (!isLeader) {
|
|
4768
5623
|
this.pruneDebouncedFnAddIfNotKeeping({
|
|
4769
5624
|
key: entryReplicated.hash,
|
|
@@ -4780,9 +5635,17 @@ let SharedLog = (() => {
|
|
|
4780
5635
|
}
|
|
4781
5636
|
}
|
|
4782
5637
|
}
|
|
5638
|
+
if (this._isAdaptiveReplicating && hasSelfRangeRemoval) {
|
|
5639
|
+
await this.pruneIndexedEntriesNoLongerLed();
|
|
5640
|
+
}
|
|
4783
5641
|
if (forceFreshDelivery) {
|
|
4784
|
-
//
|
|
4785
|
-
|
|
5642
|
+
// Pure leave/shrink churn can have zero `addedPeers`, but the peers that
|
|
5643
|
+
// received redistributed entries still need a follow-up repair pass if the
|
|
5644
|
+
// immediate maybe-sync misses one entry.
|
|
5645
|
+
this.scheduleRepairSweep({
|
|
5646
|
+
mode: "churn",
|
|
5647
|
+
peers: churnRepairPeers,
|
|
5648
|
+
});
|
|
4786
5649
|
}
|
|
4787
5650
|
else if (useJoinWarmupFastPath) {
|
|
4788
5651
|
// Pure join warmup uses the cheap immediate maybe-missing dispatch above,
|
|
@@ -4795,19 +5658,22 @@ let SharedLog = (() => {
|
|
|
4795
5658
|
return;
|
|
4796
5659
|
}
|
|
4797
5660
|
this.scheduleRepairSweep({
|
|
4798
|
-
|
|
4799
|
-
|
|
5661
|
+
mode: "join-warmup",
|
|
5662
|
+
peers,
|
|
4800
5663
|
});
|
|
4801
5664
|
}, 250);
|
|
4802
5665
|
timer.unref?.();
|
|
4803
5666
|
this._repairRetryTimers.add(timer);
|
|
4804
5667
|
}
|
|
4805
|
-
else if (
|
|
5668
|
+
else if (authoritativeRepairPeers.size > 0) {
|
|
4806
5669
|
this.scheduleRepairSweep({
|
|
4807
|
-
|
|
4808
|
-
|
|
5670
|
+
mode: "join-authoritative",
|
|
5671
|
+
peers: authoritativeRepairPeers,
|
|
4809
5672
|
});
|
|
4810
5673
|
}
|
|
5674
|
+
if (!forceFreshDelivery && authoritativeRepairPeers.size > 0) {
|
|
5675
|
+
this.scheduleJoinAuthoritativeRepair(authoritativeRepairPeers);
|
|
5676
|
+
}
|
|
4811
5677
|
for (const target of [...uncheckedDeliver.keys()]) {
|
|
4812
5678
|
flushUncheckedDeliverTarget(target);
|
|
4813
5679
|
}
|
|
@@ -4879,6 +5745,10 @@ let SharedLog = (() => {
|
|
|
4879
5745
|
if (!dynamicRange) {
|
|
4880
5746
|
return; // not allowed to replicate
|
|
4881
5747
|
}
|
|
5748
|
+
if (this.replicationController.maxMemoryLimit != null &&
|
|
5749
|
+
usedMemory > this.replicationController.maxMemoryLimit) {
|
|
5750
|
+
await this.pruneIndexedEntriesNoLongerLed();
|
|
5751
|
+
}
|
|
4882
5752
|
const peersSize = (await peers.getSize()) || 1;
|
|
4883
5753
|
const totalParticipation = await this.calculateTotalParticipation();
|
|
4884
5754
|
const newFactor = this.replicationController.step({
|