querysub 0.433.0 → 0.436.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.js +50 -50
- package/bin/deploy.js +0 -0
- package/bin/function.js +0 -0
- package/bin/server.js +0 -0
- package/costsBenefits.txt +115 -115
- package/deploy.ts +2 -2
- package/package.json +1 -1
- package/spec.txt +1192 -1192
- package/src/-a-archives/archives.ts +202 -202
- package/src/-a-archives/archivesBackBlaze.ts +1 -0
- package/src/-a-archives/archivesDisk.ts +454 -454
- package/src/-a-auth/certs.ts +540 -540
- package/src/-a-auth/node-forge-ed25519.d.ts +16 -16
- package/src/-b-authorities/dnsAuthority.ts +138 -138
- package/src/-c-identity/IdentityController.ts +258 -258
- package/src/-d-trust/NetworkTrust2.ts +180 -180
- package/src/-e-certs/EdgeCertController.ts +252 -252
- package/src/-e-certs/certAuthority.ts +201 -201
- package/src/-f-node-discovery/NodeDiscovery.ts +640 -640
- package/src/-g-core-values/NodeCapabilities.ts +200 -200
- package/src/-h-path-value-serialize/stringSerializer.ts +175 -175
- package/src/0-path-value-core/PathValueCommitter.ts +468 -468
- package/src/0-path-value-core/pathValueCore.ts +2 -2
- package/src/2-proxy/PathValueProxyWatcher.ts +2542 -2542
- package/src/2-proxy/TransactionDelayer.ts +94 -94
- package/src/2-proxy/pathDatabaseProxyBase.ts +36 -36
- package/src/2-proxy/pathValueProxy.ts +159 -159
- package/src/3-path-functions/PathFunctionRunnerMain.ts +87 -87
- package/src/3-path-functions/pathFunctionLoader.ts +516 -516
- package/src/3-path-functions/tests/rejectTest.ts +76 -76
- package/src/4-deploy/deployCheck.ts +6 -6
- package/src/4-dom/css.tsx +29 -29
- package/src/4-dom/cssTypes.d.ts +211 -211
- package/src/4-dom/qreact.tsx +2799 -2799
- package/src/4-dom/qreactTest.tsx +410 -410
- package/src/4-querysub/permissions.ts +335 -335
- package/src/4-querysub/querysubPrediction.ts +483 -483
- package/src/5-diagnostics/qreactDebug.tsx +346 -346
- package/src/TestController.ts +34 -34
- package/src/bits.ts +104 -104
- package/src/buffers.ts +69 -69
- package/src/diagnostics/ActionsHistory.ts +57 -57
- package/src/diagnostics/listenOnDebugger.ts +71 -71
- package/src/diagnostics/periodic.ts +111 -111
- package/src/diagnostics/trackResources.ts +91 -91
- package/src/diagnostics/watchdog.ts +120 -120
- package/src/errors.ts +133 -133
- package/src/forceProduction.ts +2 -2
- package/src/fs.ts +80 -80
- package/src/functional/diff.ts +857 -857
- package/src/functional/promiseCache.ts +78 -78
- package/src/functional/random.ts +8 -8
- package/src/functional/stats.ts +60 -60
- package/src/heapDumps.ts +665 -665
- package/src/https.ts +1 -1
- package/src/library-components/AspectSizedComponent.tsx +87 -87
- package/src/library-components/ButtonSelector.tsx +64 -64
- package/src/library-components/DropdownCustom.tsx +150 -150
- package/src/library-components/DropdownSelector.tsx +31 -31
- package/src/library-components/InlinePopup.tsx +66 -66
- package/src/misc/color.ts +29 -29
- package/src/misc/hash.ts +83 -83
- package/src/misc/ipPong.js +13 -13
- package/src/misc/networking.ts +1 -1
- package/src/misc/random.ts +44 -44
- package/src/misc.ts +196 -196
- package/src/path.ts +255 -255
- package/src/persistentLocalStore.ts +41 -41
- package/src/promise.ts +14 -14
- package/src/storage/fileSystemPointer.ts +71 -71
- package/src/test/heapProcess.ts +35 -35
- package/src/zip.ts +15 -15
- package/tsconfig.json +26 -26
- package/yarnSpec.txt +56 -56
|
@@ -1,641 +1,641 @@
|
|
|
1
|
-
import { SocketFunction } from "socket-function/SocketFunction";
|
|
2
|
-
import { getArchives } from "../-a-archives/archives";
|
|
3
|
-
import { getDomain, isDevDebugbreak, isNoNetwork, isPublic } from "../config";
|
|
4
|
-
import { measureBlock } from "socket-function/src/profiling/measure";
|
|
5
|
-
import { isNode, keyByArray, sha256Hash, throttleFunction, timeInMinute, timeInSecond } from "socket-function/src/misc";
|
|
6
|
-
import { errorToUndefinedSilent, ignoreErrors, logErrors, timeoutToError, timeoutToUndefinedSilent } from "../errors";
|
|
7
|
-
import { ensureWeAreTrusted, requiresNetworkTrustHook } from "../-d-trust/NetworkTrust2";
|
|
8
|
-
import { delay, runInfinitePoll, runInfinitePollCallAtStart } from "socket-function/src/batching";
|
|
9
|
-
import { getCallFactory, getCreateCallFactory, getNodeId, getNodeIdFromLocation, getNodeIdLocation } from "socket-function/src/nodeCache";
|
|
10
|
-
import { cache, lazy } from "socket-function/src/caching";
|
|
11
|
-
import { shuffle } from "../misc/random";
|
|
12
|
-
import { blue, green, magenta, red, yellow } from "socket-function/src/formatting/logColors";
|
|
13
|
-
import { PromiseObj } from "../promise";
|
|
14
|
-
import { formatDateTime } from "socket-function/src/formatting/format";
|
|
15
|
-
import { isClient, isServer } from "../config2";
|
|
16
|
-
import { waitForFirstTimeSync } from "socket-function/time/trueTimeShim";
|
|
17
|
-
import { decodeNodeId, decodeNodeIdAssert } from "../-a-auth/certs";
|
|
18
|
-
|
|
19
|
-
import { isDefined } from "../misc";
|
|
20
|
-
import { getBootedEdgeNode } from "../-0-hooks/hooks";
|
|
21
|
-
import { EdgeNodeConfig } from "../4-deploy/edgeNodes";
|
|
22
|
-
import * as certs from "../-a-auth/certs";
|
|
23
|
-
import { logDisk } from "../diagnostics/logs/diskLogger";
|
|
24
|
-
import { MaybePromise } from "socket-function/src/types";
|
|
25
|
-
import { getPathStr2 } from "../path";
|
|
26
|
-
|
|
27
|
-
let HEARTBEAT_INTERVAL = timeInMinute * 15;
|
|
28
|
-
// Interval which we check other heartbeats
|
|
29
|
-
let CHECK_INTERVAL = HEARTBEAT_INTERVAL;
|
|
30
|
-
// If the heartbeat is older than thing, it fails the dead check
|
|
31
|
-
let DEAD_THRESHOLD = HEARTBEAT_INTERVAL * 2;
|
|
32
|
-
// If we find another node is dead this number of checks, we remove it from storage, and tell
|
|
33
|
-
// all other nodes to remove from their memory.
|
|
34
|
-
let DEAD_CHECK_COUNT = 4;
|
|
35
|
-
// If we find we are unable to write our heartbeat, we have to kill our own process. Otherwise it may
|
|
36
|
-
// be too out of sync, and might commit unverified data to the disk.
|
|
37
|
-
let SUICIDE_HEARTBEAT_THRESHOLD = HEARTBEAT_INTERVAL * 4;
|
|
38
|
-
|
|
39
|
-
let CLIENTSIDE_POLL_RATE = timeInMinute * 30;
|
|
40
|
-
|
|
41
|
-
// We can't poll backblaze too often. One a minute starts to cost around 10 cents per month.
|
|
42
|
-
// So once a second would cost 6 USD per month, per service... which starts to get expensive.
|
|
43
|
-
let DISK_AUDIT_RATE = timeInMinute * 15;
|
|
44
|
-
// We CAN poll our API frequently. The overhead for calls should be far less than 1ms, and
|
|
45
|
-
// the bandwidth should also be effectively nothing. Maybe 2.5GB per month if we send a
|
|
46
|
-
// request per second, and each request is 1000 bytes (which as we use websockets, it
|
|
47
|
-
// probably is less than that). Which is around 2.5 cents on digital ocean IF we go over
|
|
48
|
-
// our 1TB/month allowance.
|
|
49
|
-
let API_AUDIT_RATE = timeInSecond * 30;
|
|
50
|
-
let API_AUDIT_COUNT = 12;
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
let DEAD_NODE_POLL_COOLDOWN = timeInMinute * 5;
|
|
54
|
-
|
|
55
|
-
let shutdown = false;
|
|
56
|
-
|
|
57
|
-
const archives = lazy(() => getArchives("nodes/"));
|
|
58
|
-
|
|
59
|
-
let logging = true;
|
|
60
|
-
export function enableNodeDiscoveryLogging() {
|
|
61
|
-
logging = true;
|
|
62
|
-
}
|
|
63
|
-
export function isNodeDiscoveryLogging() {
|
|
64
|
-
return logging;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
function getAlternateNodeIds(nodeId: string): MaybePromise<string[] | undefined> {
|
|
69
|
-
let machineId = certs.getMachineId(nodeId);
|
|
70
|
-
if (machineId === getOwnMachineId()) {
|
|
71
|
-
let decoded = decodeNodeId(nodeId);
|
|
72
|
-
if (decoded) {
|
|
73
|
-
return [
|
|
74
|
-
"127-0-0-1." + decoded.domain + ":" + decoded.port
|
|
75
|
-
];
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
return undefined;
|
|
79
|
-
}
|
|
80
|
-
SocketFunction.GET_ALTERNATE_NODE_IDS = getAlternateNodeIds;
|
|
81
|
-
|
|
82
|
-
export const getOurNodeId = getOwnNodeId;
|
|
83
|
-
export const getOurNodeIdAssert = getOwnNodeIdAssert;
|
|
84
|
-
|
|
85
|
-
export const SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE = "SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE";
|
|
86
|
-
export function getOwnNodeId(): string {
|
|
87
|
-
let nodeId = SocketFunction.mountedNodeId;
|
|
88
|
-
if (!nodeId) {
|
|
89
|
-
return SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE;
|
|
90
|
-
}
|
|
91
|
-
return nodeId;
|
|
92
|
-
}
|
|
93
|
-
export function getMountNodeId(): string | undefined | "" {
|
|
94
|
-
return SocketFunction.mountedNodeId;
|
|
95
|
-
}
|
|
96
|
-
export function getOwnNodeIdAssert(): string {
|
|
97
|
-
let nodeId = SocketFunction.mountedNodeId;
|
|
98
|
-
if (!nodeId) {
|
|
99
|
-
throw new Error(`Node must be mounted before nodeId is accessed`);
|
|
100
|
-
}
|
|
101
|
-
return nodeId;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
export const getOwnThreadId = lazy(() => {
|
|
105
|
-
return certs.getOwnThreadId();
|
|
106
|
-
});
|
|
107
|
-
export const getOwnMachineId = lazy(() => {
|
|
108
|
-
return certs.getOwnMachineId();
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
export function isOwnNodeId(nodeId: string): boolean {
|
|
112
|
-
if (
|
|
113
|
-
nodeId === getOwnNodeId()
|
|
114
|
-
|| nodeId === SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE
|
|
115
|
-
) {
|
|
116
|
-
return true;
|
|
117
|
-
}
|
|
118
|
-
// If it's 127.0.0.1, and on the same port as us, it is us.
|
|
119
|
-
let obj = decodeNodeId(nodeId);
|
|
120
|
-
if (obj && obj.domain === "127-0-0-1." + getDomain() && obj.port === decodeNodeId(getOwnNodeId())?.port) {
|
|
121
|
-
return true;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
return false;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
export function isNodeIdOnOwnMachineId(nodeId: string): boolean {
|
|
128
|
-
return certs.getMachineId(nodeId) === getOwnMachineId() || decodeNodeId(nodeId)?.machineId === "127-0-0-1";
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
export function areNodeIdsEqual(lhs: string, rhs: string): boolean {
|
|
132
|
-
if (lhs === rhs) return true;
|
|
133
|
-
return isNodeIdOnOwnMachineId(lhs) && isNodeIdOnOwnMachineId(rhs) && getNodeIdLocation(rhs)?.port === getNodeIdLocation(lhs)?.port;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
let nodeOverrides: string[] | undefined;
|
|
137
|
-
let beforeGetNodeAllId = async () => { };
|
|
138
|
-
export async function getAllNodeIds() {
|
|
139
|
-
await beforeGetNodeAllId();
|
|
140
|
-
|
|
141
|
-
// NOTE: We always wait for the time sync before returning any node ids. Because if we try to talk
|
|
142
|
-
// to remote nodes with a massively out of sync time, they will likely reject our messages!
|
|
143
|
-
let promise = waitForFirstTimeSync();
|
|
144
|
-
if (promise) {
|
|
145
|
-
await measureBlock(async () => {
|
|
146
|
-
await promise;
|
|
147
|
-
}, "waitForFirstTimeSync");
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
if (nodeOverrides) {
|
|
151
|
-
return nodeOverrides;
|
|
152
|
-
}
|
|
153
|
-
return Array.from(allNodeIds2);
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
export async function syncNodesNow() {
|
|
157
|
-
await syncArchives();
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
/** NOTE: You should try to use watchDeltaNodeIds instead of this. */
|
|
161
|
-
export function watchNodeIds(callback: (nodeIds: string[]) => void) {
|
|
162
|
-
function call() {
|
|
163
|
-
callback(Array.from(allNodeIds2));
|
|
164
|
-
}
|
|
165
|
-
nodeIdWatchers.push(call);
|
|
166
|
-
call();
|
|
167
|
-
}
|
|
168
|
-
export function watchDeltaNodeIds(callback: (config: { newNodeIds: string[]; removedNodeIds: string[] }) => void) {
|
|
169
|
-
let prevNodeIds: string[] = [];
|
|
170
|
-
watchNodeIds(nodeIds => {
|
|
171
|
-
let newNodeIds = nodeIds.filter(nodeId => !prevNodeIds.includes(nodeId));
|
|
172
|
-
let removedNodeIds = prevNodeIds.filter(nodeId => !nodeIds.includes(nodeId));
|
|
173
|
-
if (newNodeIds.length > 0 || removedNodeIds.length > 0) {
|
|
174
|
-
callback({ newNodeIds, removedNodeIds });
|
|
175
|
-
}
|
|
176
|
-
prevNodeIds = nodeIds;
|
|
177
|
-
});
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
let allNodeIds2 = new Set<string>();
|
|
181
|
-
function getAllNodesHash() {
|
|
182
|
-
// Always include ourselves in the hash
|
|
183
|
-
let sortedNodes = Array.from(allNodeIds2);
|
|
184
|
-
sortedNodes.sort();
|
|
185
|
-
return sha256Hash(JSON.stringify(sortedNodes));
|
|
186
|
-
}
|
|
187
|
-
function addNodeId(nodeId: string) {
|
|
188
|
-
addNodeIdBase(nodeId);
|
|
189
|
-
}
|
|
190
|
-
function addNodeIdBase(nodeId: string) {
|
|
191
|
-
if (allNodeIds2.has(nodeId)) return;
|
|
192
|
-
if (logging) {
|
|
193
|
-
console.log(blue(`Discovered node ${nodeId}`));
|
|
194
|
-
}
|
|
195
|
-
allNodeIds2.add(nodeId);
|
|
196
|
-
onNodesChanged();
|
|
197
|
-
}
|
|
198
|
-
async function setNodeIds(nodeIds: string[]) {
|
|
199
|
-
nodeIds = nodeIds.filter(x => x !== SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE);
|
|
200
|
-
|
|
201
|
-
console.info("setNodeIds", { nodeIds });
|
|
202
|
-
let newNodeIds = nodeIds.filter(nodeId => !allNodeIds2.has(nodeId));
|
|
203
|
-
let newIds = new Set(nodeIds);
|
|
204
|
-
let removedNodeIds = Array.from(allNodeIds2).filter(nodeId => !newIds.has(nodeId));
|
|
205
|
-
if (newNodeIds.length === 0 && removedNodeIds.length === 0) return;
|
|
206
|
-
if (logging) {
|
|
207
|
-
for (let nodeId of newNodeIds) {
|
|
208
|
-
console.log(blue(`Discovered node ${nodeId}`));
|
|
209
|
-
}
|
|
210
|
-
for (let nodeId of removedNodeIds) {
|
|
211
|
-
console.log(red(`Removed node from setNodeIds ${nodeId}`));
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
allNodeIds2 = new Set(nodeIds);
|
|
215
|
-
onNodesChanged();
|
|
216
|
-
}
|
|
217
|
-
let nodeIdWatchers: (() => void)[] = [];
|
|
218
|
-
function onNodesChanged() {
|
|
219
|
-
for (let watcher of nodeIdWatchers) {
|
|
220
|
-
try {
|
|
221
|
-
watcher();
|
|
222
|
-
} catch (e: any) {
|
|
223
|
-
console.log(red(`Ignoring error from callback ${e.stack}`));
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
let rootDiscoveryNodeId = "";
|
|
229
|
-
export function configRootDiscoveryLocation(config: {
|
|
230
|
-
domain: string;
|
|
231
|
-
port: number;
|
|
232
|
-
}) {
|
|
233
|
-
rootDiscoveryNodeId = getNodeId(config.domain, config.port);
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
export function updateRootDiscoveryLocation(edgeNode: EdgeNodeConfig) {
|
|
237
|
-
let prevRootId = getBrowserUrlNode();
|
|
238
|
-
if (!isClient()) {
|
|
239
|
-
throw new Error(`updateRootDiscoveryLocation should only be called in the browser. If not a client, you should be able to discovery new nodes automatically`);
|
|
240
|
-
}
|
|
241
|
-
let nodeId = edgeNode.host;
|
|
242
|
-
let nodeCache = Object.entries(require.cache);
|
|
243
|
-
for (let [path, module] of nodeCache) {
|
|
244
|
-
if (!module) continue;
|
|
245
|
-
if (!path.includes(prevRootId)) continue;
|
|
246
|
-
let newPath = path.replace(prevRootId, nodeId);
|
|
247
|
-
// Remove the old module
|
|
248
|
-
delete require.cache[path];
|
|
249
|
-
// Add the new module
|
|
250
|
-
require.cache[newPath] = module;
|
|
251
|
-
module.filename = module.filename.replace(prevRootId, nodeId);
|
|
252
|
-
module.id = module.id.replace(prevRootId, nodeId);
|
|
253
|
-
if (module.original) {
|
|
254
|
-
module.original.filename = module.original.filename.replace(prevRootId, nodeId);
|
|
255
|
-
module.original.originalId = module.original.originalId.replace(prevRootId, nodeId);
|
|
256
|
-
for (let [key, value] of Object.entries(module.original.requests)) {
|
|
257
|
-
if (key.includes(prevRootId)) {
|
|
258
|
-
let newKey = key.replace(prevRootId, nodeId);
|
|
259
|
-
delete module.original.requests[key];
|
|
260
|
-
module.original.requests[newKey] = value;
|
|
261
|
-
key = newKey;
|
|
262
|
-
}
|
|
263
|
-
if (value.includes(prevRootId)) {
|
|
264
|
-
value = value.replace(prevRootId, nodeId);
|
|
265
|
-
module.original.requests[key] = value;
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
globalThis.BOOTED_EDGE_NODE = edgeNode;
|
|
271
|
-
rootDiscoveryNodeId = nodeId;
|
|
272
|
-
addNodeId(nodeId);
|
|
273
|
-
}
|
|
274
|
-
(globalThis as any).updateRootDiscoveryLocation = updateRootDiscoveryLocation;
|
|
275
|
-
|
|
276
|
-
/** NOTE: Can also be called serverside, if configRootDiscoveryLocation is called (otherwise can always be called clientside). */
|
|
277
|
-
export function getBrowserUrlNode() {
|
|
278
|
-
if (!isClient()) throw new Error(`getBrowserUrlNode can only be called when isClient()`);
|
|
279
|
-
let rootLocation = rootDiscoveryNodeId;
|
|
280
|
-
if (!rootLocation && !isNode()) {
|
|
281
|
-
rootLocation = getNodeIdFromLocation();
|
|
282
|
-
}
|
|
283
|
-
if (!rootLocation) {
|
|
284
|
-
throw new Error(`configRootDiscoveryLocation wasn't called, and ---client appears to have been used (isNode() and isClient()). Call configRootDiscoveryLocation with the domain and port of the server you wish to use (ex, querysub.com:1111)`);
|
|
285
|
-
}
|
|
286
|
-
return rootLocation;
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
export async function triggerNodeChange() {
|
|
290
|
-
await syncNodesNow();
|
|
291
|
-
await Promise.allSettled(Array.from(allNodeIds2).map(async nodeId => {
|
|
292
|
-
if (isOwnNodeId(nodeId)) return;
|
|
293
|
-
await timeoutToUndefinedSilent(timeInSecond * 5, errorToUndefinedSilent(NodeDiscoveryController.nodes[nodeId].addNode(getOwnNodeId())));
|
|
294
|
-
}));
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
async function clearDeadThreadsFromArchives() {
|
|
299
|
-
let nodes = await archives().find("");
|
|
300
|
-
|
|
301
|
-
function getPortHash(nodeId: string) {
|
|
302
|
-
let obj = decodeNodeId(nodeId);
|
|
303
|
-
if (!obj) return undefined;
|
|
304
|
-
return getPathStr2(obj.machineId, obj.port + "");
|
|
305
|
-
}
|
|
306
|
-
let byPortHash = keyByArray(nodes, getPortHash);
|
|
307
|
-
for (let [portHash, nodeIds] of Array.from(byPortHash)) {
|
|
308
|
-
if (!portHash) continue;
|
|
309
|
-
let aliveNodeId = "";
|
|
310
|
-
await Promise.all(nodeIds.map(async nodeId => {
|
|
311
|
-
let alive = await errorToUndefinedSilent(NodeDiscoveryController.nodes[nodeId].isAlive());
|
|
312
|
-
if (alive) {
|
|
313
|
-
aliveNodeId = nodeId;
|
|
314
|
-
}
|
|
315
|
-
}));
|
|
316
|
-
if (aliveNodeId) {
|
|
317
|
-
let deadThreads = nodeIds.filter(nodeId => nodeId !== aliveNodeId);
|
|
318
|
-
await Promise.all(deadThreads.map(async deadNodeId => {
|
|
319
|
-
console.log(`Removing dead thread. We contacted a node on the same port and same machine (${aliveNodeId}), which means the port has been reused by another thread, which proves that the old thread has died, as otherwise the new thread would not be able to use it.`);
|
|
320
|
-
await archives().del(deadNodeId);
|
|
321
|
-
}));
|
|
322
|
-
}
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
return nodes;
|
|
326
|
-
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
async function syncArchives() {
|
|
330
|
-
if (isServer()) {
|
|
331
|
-
// Make sure we are present
|
|
332
|
-
await writeHeartbeat();
|
|
333
|
-
let nodeIds = await archives().find("");
|
|
334
|
-
console.info(`Synced node ids from archives`, { nodeIds });
|
|
335
|
-
await setNodeIds(nodeIds);
|
|
336
|
-
} else {
|
|
337
|
-
if (isNoNetwork() || !isNode()) {
|
|
338
|
-
// NOTE: If no network, our trust source might be different, so we can't talk to regular nodes,
|
|
339
|
-
// and instead have to only talk to HTTP nodes
|
|
340
|
-
await setNodeIds([getBrowserUrlNode()]);
|
|
341
|
-
} else {
|
|
342
|
-
// If on the network, NetworkTrust2 should sync the trusted machines from backblaze, so we should be
|
|
343
|
-
// able to talk to any nodes.
|
|
344
|
-
// - If they user is using --client they only want to talk to querysub nodes. There might be multiple,
|
|
345
|
-
// which cloudflare will proxy, HOWEVER, it is more efficient to directly access the node list, which
|
|
346
|
-
// will be better for load balancing and updating on failure than the cloudflare proxying... probably.
|
|
347
|
-
await setNodeIds(await NodeDiscoveryController.nodes[getBrowserUrlNode()].getAllNodeIds());
|
|
348
|
-
}
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
async function runHeartbeatAuditLoop() {
|
|
353
|
-
await getAllNodeIds();
|
|
354
|
-
let deadCount = new Map<string, number>();
|
|
355
|
-
// 90% of the normal interval, so we don't run at the same time as the other audit
|
|
356
|
-
await runInfinitePollCallAtStart(CHECK_INTERVAL * 0.9, async () => {
|
|
357
|
-
if (shutdown) return;
|
|
358
|
-
// Wait a bit longer, to try to prevent all nodes from synchronizing their audit times.
|
|
359
|
-
console.log(magenta(`Auditing node list`));
|
|
360
|
-
await delay(CHECK_INTERVAL * Math.random() * 0.1);
|
|
361
|
-
//console.log(magenta(`Auditing node list`));
|
|
362
|
-
|
|
363
|
-
let deadTime = Date.now() - DEAD_THRESHOLD;
|
|
364
|
-
|
|
365
|
-
let nodeIds = await clearDeadThreadsFromArchives();
|
|
366
|
-
// We spent the money checking the node list, so we might as well update it
|
|
367
|
-
await setNodeIds(nodeIds);
|
|
368
|
-
|
|
369
|
-
let pendingDeadCount = 0;
|
|
370
|
-
|
|
371
|
-
let removedNodeIds: string[] = [];
|
|
372
|
-
for (let nodeId of nodeIds) {
|
|
373
|
-
let lastTime = Number((await archives().get(nodeId))?.toString()) || 0;
|
|
374
|
-
if (lastTime < deadTime) {
|
|
375
|
-
// Increment the dead count
|
|
376
|
-
let count = deadCount.get(nodeId) || 0;
|
|
377
|
-
count++;
|
|
378
|
-
deadCount.set(nodeId, count);
|
|
379
|
-
if (count >= DEAD_CHECK_COUNT) {
|
|
380
|
-
removedNodeIds.push(nodeId);
|
|
381
|
-
console.log(yellow(`Node ${nodeId} was found to be dead, removing from node list. Last heartbeat at ${formatDateTime(lastTime)}, dead threshold at ${formatDateTime(deadTime)}`));
|
|
382
|
-
await archives().del(nodeId);
|
|
383
|
-
deadCount.delete(nodeId);
|
|
384
|
-
} else {
|
|
385
|
-
console.log(yellow(`Node ${nodeId} was found to be dead, last heartbeat at ${formatDateTime(lastTime)} < dead threshold at ${formatDateTime(deadTime)}, dead count ${count}/${DEAD_CHECK_COUNT}. Total nodes seen ${nodeIds.length}`));
|
|
386
|
-
pendingDeadCount++;
|
|
387
|
-
}
|
|
388
|
-
} else {
|
|
389
|
-
deadCount.delete(nodeId);
|
|
390
|
-
console.info("Read node heartbeat", { nodeId, lastTime });
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
if (pendingDeadCount) {
|
|
394
|
-
console.log(blue(`Pending dead nodes ${pendingDeadCount}/${nodeIds.length}`));
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
if (removedNodeIds.length > 0) {
|
|
398
|
-
console.log(blue(`Removed ${removedNodeIds.length}/${nodeIds.length} nodes from node list`), { removedNodeIds });
|
|
399
|
-
await syncArchives();
|
|
400
|
-
await tellEveryoneNodesChanges(`removedNodeIds ${removedNodeIds.join("|")}`);
|
|
401
|
-
}
|
|
402
|
-
});
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
// NOTE: Not just dead nodes, but also nodes that are unreachable, such as developer nodes.
|
|
406
|
-
let deadNodes = new Map<string, number>();
|
|
407
|
-
async function fastMemorySync() {
|
|
408
|
-
let aliveNodes = new Set(Array.from(allNodeIds2).filter(x => !isOwnNodeId(x)));
|
|
409
|
-
let deadThreshold = Date.now() - DEAD_NODE_POLL_COOLDOWN;
|
|
410
|
-
for (let [nodeId, time] of Array.from(deadNodes)) {
|
|
411
|
-
if (time < deadThreshold) {
|
|
412
|
-
deadNodes.delete(nodeId);
|
|
413
|
-
} else {
|
|
414
|
-
aliveNodes.delete(nodeId);
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
let checkNodes = shuffle(Array.from(aliveNodes), Date.now()).slice(0, API_AUDIT_COUNT);
|
|
418
|
-
let otherNodesAll = await Promise.all(
|
|
419
|
-
checkNodes.map(async nodeId => {
|
|
420
|
-
let nodes = await timeoutToUndefinedSilent(200, NodeDiscoveryController.nodes[nodeId].getAllNodeIds());
|
|
421
|
-
if (!nodes) {
|
|
422
|
-
deadNodes.set(nodeId, Date.now());
|
|
423
|
-
}
|
|
424
|
-
return nodes || [];
|
|
425
|
-
})
|
|
426
|
-
);
|
|
427
|
-
let otherNodes = Array.from(new Set(otherNodesAll.flat()));
|
|
428
|
-
// This would log WAY too much, because we poll a lot, because we want to minimize downtime
|
|
429
|
-
//console.log(magenta(`Fast memory sync at ${formatVeryNiceDateTime(Date.now())}, nodes found ${otherNodes.length}`), getDebuggerUrl());
|
|
430
|
-
|
|
431
|
-
// If they are missing nodes that's fine. We constantly have extra nodes, and have to function correctly
|
|
432
|
-
// with extra nodes. However, if we are missing nodes, we'd prefer to have them quickly, so we should
|
|
433
|
-
// sync now.
|
|
434
|
-
let missingNodes = otherNodes.filter(nodeId => !allNodeIds2.has(nodeId));
|
|
435
|
-
if (missingNodes.length > 0) {
|
|
436
|
-
console.log(yellow(`Node list is missing nodes, resyncing node`), { missingNodes, otherNodes });
|
|
437
|
-
await syncArchives();
|
|
438
|
-
}
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
async function runMemoryAuditLoop() {
|
|
442
|
-
await getAllNodeIds();
|
|
443
|
-
runInfinitePoll(DISK_AUDIT_RATE, syncArchives);
|
|
444
|
-
runInfinitePoll(API_AUDIT_RATE, fastMemorySync);
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
async function writeHeartbeat() {
|
|
448
|
-
if (shutdown) return;
|
|
449
|
-
let now = Date.now();
|
|
450
|
-
let nodeId = getMountNodeId();
|
|
451
|
-
console.log(green(`Writing heartbeat ${formatDateTime(now)} for self (${nodeId})`));
|
|
452
|
-
if (!nodeId) return;
|
|
453
|
-
await archives().set(nodeId, Buffer.from(now + ""));
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
async function runMainSyncLoops() {
|
|
457
|
-
await syncArchives();
|
|
458
|
-
|
|
459
|
-
discoveryReady.resolve();
|
|
460
|
-
|
|
461
|
-
// We can't heartbeat until we mount
|
|
462
|
-
await SocketFunction.mountPromise;
|
|
463
|
-
|
|
464
|
-
// We have to write before we call NodeDiscoveryController.addNode
|
|
465
|
-
await writeHeartbeat();
|
|
466
|
-
await syncArchives();
|
|
467
|
-
let selfNodeId = SocketFunction.mountedNodeId;
|
|
468
|
-
if (!allNodeIds2.has(selfNodeId)) {
|
|
469
|
-
throw new Error(`Failed sanity check, our node id didn't appear in archives after we just wrote it? Missing ${SocketFunction.mountedNodeId}`);
|
|
470
|
-
}
|
|
471
|
-
// NOTE: Our first broadcast is special
|
|
472
|
-
await Promise.allSettled(Array.from(allNodeIds2).map(async nodeId => {
|
|
473
|
-
if (isOwnNodeId(nodeId)) return;
|
|
474
|
-
// Ignore errors, but wait a bit, so hopefully 99.99% of the time we can be certain
|
|
475
|
-
// all other nodes know our node id at this point.
|
|
476
|
-
await timeoutToUndefinedSilent(timeInSecond * 5, errorToUndefinedSilent(NodeDiscoveryController.nodes[nodeId].addNode(getOwnNodeId())));
|
|
477
|
-
}));
|
|
478
|
-
|
|
479
|
-
nodeBroadcasted.resolve();
|
|
480
|
-
|
|
481
|
-
console.log(magenta(`Node discovery is loaded`));
|
|
482
|
-
|
|
483
|
-
await runInfinitePollCallAtStart(HEARTBEAT_INTERVAL, async function nodeDiscoverHeartbeat() {
|
|
484
|
-
// If we waited too long, other nodes might think we are dead. In which case, we SHOULD terminate.
|
|
485
|
-
if (!isNoNetwork()) {
|
|
486
|
-
// FIRST, verify we didn't delay too long (to make sure we kill any nodes that were disconnected
|
|
487
|
-
// from the internet for too long)
|
|
488
|
-
let lastTime = Number((await archives().get(selfNodeId))?.toString());
|
|
489
|
-
let suicideThreshold = Date.now() - SUICIDE_HEARTBEAT_THRESHOLD;
|
|
490
|
-
if (!lastTime || lastTime < suicideThreshold) {
|
|
491
|
-
if (!lastTime) {
|
|
492
|
-
console.error(red(`Self node was removed due to not heartbeating. Terminating self process, as it likely has very stale data.`));
|
|
493
|
-
} else {
|
|
494
|
-
console.error(red(`Self node was has very old heartbeat. Terminating self process, as it likely has very stale data.`));
|
|
495
|
-
}
|
|
496
|
-
process.exit();
|
|
497
|
-
}
|
|
498
|
-
} else {
|
|
499
|
-
// We DO have to re-broadcast, otherwise no one will know we exist
|
|
500
|
-
for (let nodeId of allNodeIds2) {
|
|
501
|
-
if (isOwnNodeId(nodeId)) continue;
|
|
502
|
-
// Ignore errors, as nodes die often, and we only want to log once - when we actually remove the node id.
|
|
503
|
-
ignoreErrors(NodeDiscoveryController.nodes[nodeId].addNode(getOwnNodeId()));
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
await writeHeartbeat();
|
|
508
|
-
});
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
let discoveryReady = new PromiseObj<void>();
|
|
512
|
-
let nodeBroadcasted = new PromiseObj<void>();
|
|
513
|
-
beforeGetNodeAllId = async () => {
|
|
514
|
-
await discoveryReady.promise;
|
|
515
|
-
};
|
|
516
|
-
export async function onNodeDiscoveryReady() {
|
|
517
|
-
await getAllNodeIds();
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
export async function onNodeBroadcasted() {
|
|
521
|
-
await nodeBroadcasted.promise;
|
|
522
|
-
}
|
|
523
|
-
if (isServer()) {
|
|
524
|
-
setImmediate(async () => {
|
|
525
|
-
|
|
526
|
-
logErrors(runHeartbeatAuditLoop());
|
|
527
|
-
logErrors(runMemoryAuditLoop());
|
|
528
|
-
// NOTE: We used to wait until we mounted, but... we should be able to find nodes
|
|
529
|
-
// before we mount, right? (And what if we never mount?)
|
|
530
|
-
runMainSyncLoops().catch(e => {
|
|
531
|
-
discoveryReady.reject(e);
|
|
532
|
-
logErrors(Promise.reject(e));
|
|
533
|
-
});
|
|
534
|
-
});
|
|
535
|
-
} else {
|
|
536
|
-
|
|
537
|
-
if (isNode()) {
|
|
538
|
-
discoveryReady.resolve();
|
|
539
|
-
nodeBroadcasted.resolve();
|
|
540
|
-
// Just get the archives, syncing again if we haven't synced in a while
|
|
541
|
-
let lastGetTime = 0;
|
|
542
|
-
beforeGetNodeAllId = async () => {
|
|
543
|
-
let lastGetThreshold = lastGetTime + CLIENTSIDE_POLL_RATE;
|
|
544
|
-
if (Date.now() > lastGetThreshold) {
|
|
545
|
-
lastGetTime = Date.now();
|
|
546
|
-
await syncArchives();
|
|
547
|
-
}
|
|
548
|
-
};
|
|
549
|
-
} else {
|
|
550
|
-
setImmediate(() => {
|
|
551
|
-
let edgeNode = getBootedEdgeNode();
|
|
552
|
-
if (!edgeNode) {
|
|
553
|
-
throw new Error(`No edge node set during edgeBootstrap? This should be impossible.`);
|
|
554
|
-
}
|
|
555
|
-
let nodes = [edgeNode.host];
|
|
556
|
-
allNodeIds2 = new Set(nodes);
|
|
557
|
-
discoveryReady.resolve();
|
|
558
|
-
nodeBroadcasted.resolve();
|
|
559
|
-
|
|
560
|
-
// NOTE: We run into TLS issues (as in, our servers use self signed certs), if we try to talk to just
|
|
561
|
-
// any node, so... we better just talk to the edge node
|
|
562
|
-
// - We COULD probably just use some special domain (maybe JUST the machine domain?), with limited wildcard
|
|
563
|
-
// certs (I think we can only wildcard a single depth anyways), and A records for the machines too...
|
|
564
|
-
// but... having all traffic route through an edge node is probably better anyways...
|
|
565
|
-
nodeOverrides = nodes;
|
|
566
|
-
});
|
|
567
|
-
}
|
|
568
|
-
}
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
export async function forceRemoveNode(nodeId: string) {
|
|
572
|
-
await archives().del(nodeId);
|
|
573
|
-
void tellEveryoneNodesChanges(`forceRemoveNode ${nodeId}`);
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
/** Called on shutdown, to completely remove this node from discovery. */
|
|
578
|
-
export async function nodeDiscoveryShutdown() {
|
|
579
|
-
console.log(red(`Shutting down node discovery`));
|
|
580
|
-
shutdown = true;
|
|
581
|
-
if (isServer()) {
|
|
582
|
-
await archives().del(getOwnNodeId());
|
|
583
|
-
}
|
|
584
|
-
void tellEveryoneNodesChanges("nodeDiscoveryShutdown");
|
|
585
|
-
}
|
|
586
|
-
const tellEveryoneNodesChanges = throttleFunction(1000, function tellEveryoneNodesChanges(reason: string) {
|
|
587
|
-
if (isClient()) return;
|
|
588
|
-
console.log(red(`Telling everyone nodes changed`));
|
|
589
|
-
for (let nodeId of allNodeIds2) {
|
|
590
|
-
if (isOwnNodeId(nodeId)) continue;
|
|
591
|
-
ignoreErrors(NodeDiscoveryController.nodes[nodeId].resyncNodes(reason));
|
|
592
|
-
}
|
|
593
|
-
});
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
class NodeDiscoveryControllerBase {
|
|
597
|
-
public async isAlive() {
|
|
598
|
-
return true;
|
|
599
|
-
}
|
|
600
|
-
public async addNode(nodeId: string) {
|
|
601
|
-
console.log(magenta(`Received addNode`), { nodeId });
|
|
602
|
-
addNodeId(nodeId);
|
|
603
|
-
return true;
|
|
604
|
-
}
|
|
605
|
-
public async resyncNodes(reason: string) {
|
|
606
|
-
let caller = SocketFunction.getCaller();
|
|
607
|
-
console.log(magenta(`Received resyncNodes from ${caller.nodeId}, reason = ${reason}`));
|
|
608
|
-
await syncArchives();
|
|
609
|
-
}
|
|
610
|
-
public async getAllNodesHash(): Promise<string> {
|
|
611
|
-
return getAllNodesHash();
|
|
612
|
-
}
|
|
613
|
-
|
|
614
|
-
public async getAllNodeIds(): Promise<string[]> {
|
|
615
|
-
return Array.from(allNodeIds2);
|
|
616
|
-
}
|
|
617
|
-
public async getNodeId() {
|
|
618
|
-
return SocketFunction.mountedNodeId;
|
|
619
|
-
}
|
|
620
|
-
public async isNoNetwork() {
|
|
621
|
-
return isNoNetwork();
|
|
622
|
-
}
|
|
623
|
-
}
|
|
624
|
-
const NodeDiscoveryController = SocketFunction.register(
|
|
625
|
-
"NodeDiscoveryController-7991037e-fd9e-4085-b1db-52035487e72c",
|
|
626
|
-
new NodeDiscoveryControllerBase(),
|
|
627
|
-
() => ({
|
|
628
|
-
isAlive: {},
|
|
629
|
-
addNode: { hooks: [requiresNetworkTrustHook] },
|
|
630
|
-
resyncNodes: { hooks: [requiresNetworkTrustHook] },
|
|
631
|
-
getAllNodesHash: { hooks: [requiresNetworkTrustHook] },
|
|
632
|
-
// Skip client hooks, so we don't block on authentication (IdentityController), as some of these functions
|
|
633
|
-
// are needed for authentication to finish!
|
|
634
|
-
getAllNodeIds: { noClientHooks: true, noDefaultHooks: true },
|
|
635
|
-
getNodeId: { noClientHooks: true, noDefaultHooks: true },
|
|
636
|
-
isNoNetwork: { noClientHooks: true, noDefaultHooks: true },
|
|
637
|
-
}),
|
|
638
|
-
() => ({
|
|
639
|
-
|
|
640
|
-
})
|
|
1
|
+
import { SocketFunction } from "socket-function/SocketFunction";
|
|
2
|
+
import { getArchives } from "../-a-archives/archives";
|
|
3
|
+
import { getDomain, isDevDebugbreak, isNoNetwork, isPublic } from "../config";
|
|
4
|
+
import { measureBlock } from "socket-function/src/profiling/measure";
|
|
5
|
+
import { isNode, keyByArray, sha256Hash, throttleFunction, timeInMinute, timeInSecond } from "socket-function/src/misc";
|
|
6
|
+
import { errorToUndefinedSilent, ignoreErrors, logErrors, timeoutToError, timeoutToUndefinedSilent } from "../errors";
|
|
7
|
+
import { ensureWeAreTrusted, requiresNetworkTrustHook } from "../-d-trust/NetworkTrust2";
|
|
8
|
+
import { delay, runInfinitePoll, runInfinitePollCallAtStart } from "socket-function/src/batching";
|
|
9
|
+
import { getCallFactory, getCreateCallFactory, getNodeId, getNodeIdFromLocation, getNodeIdLocation } from "socket-function/src/nodeCache";
|
|
10
|
+
import { cache, lazy } from "socket-function/src/caching";
|
|
11
|
+
import { shuffle } from "../misc/random";
|
|
12
|
+
import { blue, green, magenta, red, yellow } from "socket-function/src/formatting/logColors";
|
|
13
|
+
import { PromiseObj } from "../promise";
|
|
14
|
+
import { formatDateTime } from "socket-function/src/formatting/format";
|
|
15
|
+
import { isClient, isServer } from "../config2";
|
|
16
|
+
import { waitForFirstTimeSync } from "socket-function/time/trueTimeShim";
|
|
17
|
+
import { decodeNodeId, decodeNodeIdAssert } from "../-a-auth/certs";
|
|
18
|
+
|
|
19
|
+
import { isDefined } from "../misc";
|
|
20
|
+
import { getBootedEdgeNode } from "../-0-hooks/hooks";
|
|
21
|
+
import { EdgeNodeConfig } from "../4-deploy/edgeNodes";
|
|
22
|
+
import * as certs from "../-a-auth/certs";
|
|
23
|
+
import { logDisk } from "../diagnostics/logs/diskLogger";
|
|
24
|
+
import { MaybePromise } from "socket-function/src/types";
|
|
25
|
+
import { getPathStr2 } from "../path";
|
|
26
|
+
|
|
27
|
+
let HEARTBEAT_INTERVAL = timeInMinute * 15;
|
|
28
|
+
// Interval which we check other heartbeats
|
|
29
|
+
let CHECK_INTERVAL = HEARTBEAT_INTERVAL;
|
|
30
|
+
// If the heartbeat is older than thing, it fails the dead check
|
|
31
|
+
let DEAD_THRESHOLD = HEARTBEAT_INTERVAL * 2;
|
|
32
|
+
// If we find another node is dead this number of checks, we remove it from storage, and tell
|
|
33
|
+
// all other nodes to remove from their memory.
|
|
34
|
+
let DEAD_CHECK_COUNT = 4;
|
|
35
|
+
// If we find we are unable to write our heartbeat, we have to kill our own process. Otherwise it may
|
|
36
|
+
// be too out of sync, and might commit unverified data to the disk.
|
|
37
|
+
let SUICIDE_HEARTBEAT_THRESHOLD = HEARTBEAT_INTERVAL * 4;
|
|
38
|
+
|
|
39
|
+
let CLIENTSIDE_POLL_RATE = timeInMinute * 30;
|
|
40
|
+
|
|
41
|
+
// We can't poll backblaze too often. One a minute starts to cost around 10 cents per month.
|
|
42
|
+
// So once a second would cost 6 USD per month, per service... which starts to get expensive.
|
|
43
|
+
let DISK_AUDIT_RATE = timeInMinute * 15;
|
|
44
|
+
// We CAN poll our API frequently. The overhead for calls should be far less than 1ms, and
|
|
45
|
+
// the bandwidth should also be effectively nothing. Maybe 2.5GB per month if we send a
|
|
46
|
+
// request per second, and each request is 1000 bytes (which as we use websockets, it
|
|
47
|
+
// probably is less than that). Which is around 2.5 cents on digital ocean IF we go over
|
|
48
|
+
// our 1TB/month allowance.
|
|
49
|
+
let API_AUDIT_RATE = timeInSecond * 30;
|
|
50
|
+
let API_AUDIT_COUNT = 12;
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
let DEAD_NODE_POLL_COOLDOWN = timeInMinute * 5;
|
|
54
|
+
|
|
55
|
+
let shutdown = false;
|
|
56
|
+
|
|
57
|
+
const archives = lazy(() => getArchives("nodes/"));
|
|
58
|
+
|
|
59
|
+
let logging = true;
|
|
60
|
+
export function enableNodeDiscoveryLogging() {
|
|
61
|
+
logging = true;
|
|
62
|
+
}
|
|
63
|
+
export function isNodeDiscoveryLogging() {
|
|
64
|
+
return logging;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
function getAlternateNodeIds(nodeId: string): MaybePromise<string[] | undefined> {
|
|
69
|
+
let machineId = certs.getMachineId(nodeId);
|
|
70
|
+
if (machineId === getOwnMachineId()) {
|
|
71
|
+
let decoded = decodeNodeId(nodeId);
|
|
72
|
+
if (decoded) {
|
|
73
|
+
return [
|
|
74
|
+
"127-0-0-1." + decoded.domain + ":" + decoded.port
|
|
75
|
+
];
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return undefined;
|
|
79
|
+
}
|
|
80
|
+
SocketFunction.GET_ALTERNATE_NODE_IDS = getAlternateNodeIds;
|
|
81
|
+
|
|
82
|
+
export const getOurNodeId = getOwnNodeId;
|
|
83
|
+
export const getOurNodeIdAssert = getOwnNodeIdAssert;
|
|
84
|
+
|
|
85
|
+
export const SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE = "SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE";
|
|
86
|
+
export function getOwnNodeId(): string {
|
|
87
|
+
let nodeId = SocketFunction.mountedNodeId;
|
|
88
|
+
if (!nodeId) {
|
|
89
|
+
return SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE;
|
|
90
|
+
}
|
|
91
|
+
return nodeId;
|
|
92
|
+
}
|
|
93
|
+
export function getMountNodeId(): string | undefined | "" {
|
|
94
|
+
return SocketFunction.mountedNodeId;
|
|
95
|
+
}
|
|
96
|
+
export function getOwnNodeIdAssert(): string {
|
|
97
|
+
let nodeId = SocketFunction.mountedNodeId;
|
|
98
|
+
if (!nodeId) {
|
|
99
|
+
throw new Error(`Node must be mounted before nodeId is accessed`);
|
|
100
|
+
}
|
|
101
|
+
return nodeId;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export const getOwnThreadId = lazy(() => {
|
|
105
|
+
return certs.getOwnThreadId();
|
|
106
|
+
});
|
|
107
|
+
export const getOwnMachineId = lazy(() => {
|
|
108
|
+
return certs.getOwnMachineId();
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
export function isOwnNodeId(nodeId: string): boolean {
|
|
112
|
+
if (
|
|
113
|
+
nodeId === getOwnNodeId()
|
|
114
|
+
|| nodeId === SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE
|
|
115
|
+
) {
|
|
116
|
+
return true;
|
|
117
|
+
}
|
|
118
|
+
// If it's 127.0.0.1, and on the same port as us, it is us.
|
|
119
|
+
let obj = decodeNodeId(nodeId);
|
|
120
|
+
if (obj && obj.domain === "127-0-0-1." + getDomain() && obj.port === decodeNodeId(getOwnNodeId())?.port) {
|
|
121
|
+
return true;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return false;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export function isNodeIdOnOwnMachineId(nodeId: string): boolean {
|
|
128
|
+
return certs.getMachineId(nodeId) === getOwnMachineId() || decodeNodeId(nodeId)?.machineId === "127-0-0-1";
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export function areNodeIdsEqual(lhs: string, rhs: string): boolean {
|
|
132
|
+
if (lhs === rhs) return true;
|
|
133
|
+
return isNodeIdOnOwnMachineId(lhs) && isNodeIdOnOwnMachineId(rhs) && getNodeIdLocation(rhs)?.port === getNodeIdLocation(lhs)?.port;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
let nodeOverrides: string[] | undefined;
|
|
137
|
+
let beforeGetNodeAllId = async () => { };
|
|
138
|
+
export async function getAllNodeIds() {
|
|
139
|
+
await beforeGetNodeAllId();
|
|
140
|
+
|
|
141
|
+
// NOTE: We always wait for the time sync before returning any node ids. Because if we try to talk
|
|
142
|
+
// to remote nodes with a massively out of sync time, they will likely reject our messages!
|
|
143
|
+
let promise = waitForFirstTimeSync();
|
|
144
|
+
if (promise) {
|
|
145
|
+
await measureBlock(async () => {
|
|
146
|
+
await promise;
|
|
147
|
+
}, "waitForFirstTimeSync");
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (nodeOverrides) {
|
|
151
|
+
return nodeOverrides;
|
|
152
|
+
}
|
|
153
|
+
return Array.from(allNodeIds2);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export async function syncNodesNow() {
|
|
157
|
+
await syncArchives();
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/** NOTE: You should try to use watchDeltaNodeIds instead of this. */
|
|
161
|
+
export function watchNodeIds(callback: (nodeIds: string[]) => void) {
|
|
162
|
+
function call() {
|
|
163
|
+
callback(Array.from(allNodeIds2));
|
|
164
|
+
}
|
|
165
|
+
nodeIdWatchers.push(call);
|
|
166
|
+
call();
|
|
167
|
+
}
|
|
168
|
+
export function watchDeltaNodeIds(callback: (config: { newNodeIds: string[]; removedNodeIds: string[] }) => void) {
|
|
169
|
+
let prevNodeIds: string[] = [];
|
|
170
|
+
watchNodeIds(nodeIds => {
|
|
171
|
+
let newNodeIds = nodeIds.filter(nodeId => !prevNodeIds.includes(nodeId));
|
|
172
|
+
let removedNodeIds = prevNodeIds.filter(nodeId => !nodeIds.includes(nodeId));
|
|
173
|
+
if (newNodeIds.length > 0 || removedNodeIds.length > 0) {
|
|
174
|
+
callback({ newNodeIds, removedNodeIds });
|
|
175
|
+
}
|
|
176
|
+
prevNodeIds = nodeIds;
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
let allNodeIds2 = new Set<string>();
|
|
181
|
+
function getAllNodesHash() {
|
|
182
|
+
// Always include ourselves in the hash
|
|
183
|
+
let sortedNodes = Array.from(allNodeIds2);
|
|
184
|
+
sortedNodes.sort();
|
|
185
|
+
return sha256Hash(JSON.stringify(sortedNodes));
|
|
186
|
+
}
|
|
187
|
+
function addNodeId(nodeId: string) {
|
|
188
|
+
addNodeIdBase(nodeId);
|
|
189
|
+
}
|
|
190
|
+
function addNodeIdBase(nodeId: string) {
|
|
191
|
+
if (allNodeIds2.has(nodeId)) return;
|
|
192
|
+
if (logging) {
|
|
193
|
+
console.log(blue(`Discovered node ${nodeId}`));
|
|
194
|
+
}
|
|
195
|
+
allNodeIds2.add(nodeId);
|
|
196
|
+
onNodesChanged();
|
|
197
|
+
}
|
|
198
|
+
async function setNodeIds(nodeIds: string[]) {
|
|
199
|
+
nodeIds = nodeIds.filter(x => x !== SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE);
|
|
200
|
+
|
|
201
|
+
console.info("setNodeIds", { nodeIds });
|
|
202
|
+
let newNodeIds = nodeIds.filter(nodeId => !allNodeIds2.has(nodeId));
|
|
203
|
+
let newIds = new Set(nodeIds);
|
|
204
|
+
let removedNodeIds = Array.from(allNodeIds2).filter(nodeId => !newIds.has(nodeId));
|
|
205
|
+
if (newNodeIds.length === 0 && removedNodeIds.length === 0) return;
|
|
206
|
+
if (logging) {
|
|
207
|
+
for (let nodeId of newNodeIds) {
|
|
208
|
+
console.log(blue(`Discovered node ${nodeId}`));
|
|
209
|
+
}
|
|
210
|
+
for (let nodeId of removedNodeIds) {
|
|
211
|
+
console.log(red(`Removed node from setNodeIds ${nodeId}`));
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
allNodeIds2 = new Set(nodeIds);
|
|
215
|
+
onNodesChanged();
|
|
216
|
+
}
|
|
217
|
+
let nodeIdWatchers: (() => void)[] = [];
|
|
218
|
+
function onNodesChanged() {
|
|
219
|
+
for (let watcher of nodeIdWatchers) {
|
|
220
|
+
try {
|
|
221
|
+
watcher();
|
|
222
|
+
} catch (e: any) {
|
|
223
|
+
console.log(red(`Ignoring error from callback ${e.stack}`));
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
let rootDiscoveryNodeId = "";
|
|
229
|
+
export function configRootDiscoveryLocation(config: {
|
|
230
|
+
domain: string;
|
|
231
|
+
port: number;
|
|
232
|
+
}) {
|
|
233
|
+
rootDiscoveryNodeId = getNodeId(config.domain, config.port);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export function updateRootDiscoveryLocation(edgeNode: EdgeNodeConfig) {
|
|
237
|
+
let prevRootId = getBrowserUrlNode();
|
|
238
|
+
if (!isClient()) {
|
|
239
|
+
throw new Error(`updateRootDiscoveryLocation should only be called in the browser. If not a client, you should be able to discovery new nodes automatically`);
|
|
240
|
+
}
|
|
241
|
+
let nodeId = edgeNode.host;
|
|
242
|
+
let nodeCache = Object.entries(require.cache);
|
|
243
|
+
for (let [path, module] of nodeCache) {
|
|
244
|
+
if (!module) continue;
|
|
245
|
+
if (!path.includes(prevRootId)) continue;
|
|
246
|
+
let newPath = path.replace(prevRootId, nodeId);
|
|
247
|
+
// Remove the old module
|
|
248
|
+
delete require.cache[path];
|
|
249
|
+
// Add the new module
|
|
250
|
+
require.cache[newPath] = module;
|
|
251
|
+
module.filename = module.filename.replace(prevRootId, nodeId);
|
|
252
|
+
module.id = module.id.replace(prevRootId, nodeId);
|
|
253
|
+
if (module.original) {
|
|
254
|
+
module.original.filename = module.original.filename.replace(prevRootId, nodeId);
|
|
255
|
+
module.original.originalId = module.original.originalId.replace(prevRootId, nodeId);
|
|
256
|
+
for (let [key, value] of Object.entries(module.original.requests)) {
|
|
257
|
+
if (key.includes(prevRootId)) {
|
|
258
|
+
let newKey = key.replace(prevRootId, nodeId);
|
|
259
|
+
delete module.original.requests[key];
|
|
260
|
+
module.original.requests[newKey] = value;
|
|
261
|
+
key = newKey;
|
|
262
|
+
}
|
|
263
|
+
if (value.includes(prevRootId)) {
|
|
264
|
+
value = value.replace(prevRootId, nodeId);
|
|
265
|
+
module.original.requests[key] = value;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
globalThis.BOOTED_EDGE_NODE = edgeNode;
|
|
271
|
+
rootDiscoveryNodeId = nodeId;
|
|
272
|
+
addNodeId(nodeId);
|
|
273
|
+
}
|
|
274
|
+
(globalThis as any).updateRootDiscoveryLocation = updateRootDiscoveryLocation;
|
|
275
|
+
|
|
276
|
+
/** NOTE: Can also be called serverside, if configRootDiscoveryLocation is called (otherwise can always be called clientside). */
|
|
277
|
+
export function getBrowserUrlNode() {
|
|
278
|
+
if (!isClient()) throw new Error(`getBrowserUrlNode can only be called when isClient()`);
|
|
279
|
+
let rootLocation = rootDiscoveryNodeId;
|
|
280
|
+
if (!rootLocation && !isNode()) {
|
|
281
|
+
rootLocation = getNodeIdFromLocation();
|
|
282
|
+
}
|
|
283
|
+
if (!rootLocation) {
|
|
284
|
+
throw new Error(`configRootDiscoveryLocation wasn't called, and ---client appears to have been used (isNode() and isClient()). Call configRootDiscoveryLocation with the domain and port of the server you wish to use (ex, querysub.com:1111)`);
|
|
285
|
+
}
|
|
286
|
+
return rootLocation;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
export async function triggerNodeChange() {
|
|
290
|
+
await syncNodesNow();
|
|
291
|
+
await Promise.allSettled(Array.from(allNodeIds2).map(async nodeId => {
|
|
292
|
+
if (isOwnNodeId(nodeId)) return;
|
|
293
|
+
await timeoutToUndefinedSilent(timeInSecond * 5, errorToUndefinedSilent(NodeDiscoveryController.nodes[nodeId].addNode(getOwnNodeId())));
|
|
294
|
+
}));
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
async function clearDeadThreadsFromArchives() {
|
|
299
|
+
let nodes = await archives().find("");
|
|
300
|
+
|
|
301
|
+
function getPortHash(nodeId: string) {
|
|
302
|
+
let obj = decodeNodeId(nodeId);
|
|
303
|
+
if (!obj) return undefined;
|
|
304
|
+
return getPathStr2(obj.machineId, obj.port + "");
|
|
305
|
+
}
|
|
306
|
+
let byPortHash = keyByArray(nodes, getPortHash);
|
|
307
|
+
for (let [portHash, nodeIds] of Array.from(byPortHash)) {
|
|
308
|
+
if (!portHash) continue;
|
|
309
|
+
let aliveNodeId = "";
|
|
310
|
+
await Promise.all(nodeIds.map(async nodeId => {
|
|
311
|
+
let alive = await errorToUndefinedSilent(NodeDiscoveryController.nodes[nodeId].isAlive());
|
|
312
|
+
if (alive) {
|
|
313
|
+
aliveNodeId = nodeId;
|
|
314
|
+
}
|
|
315
|
+
}));
|
|
316
|
+
if (aliveNodeId) {
|
|
317
|
+
let deadThreads = nodeIds.filter(nodeId => nodeId !== aliveNodeId);
|
|
318
|
+
await Promise.all(deadThreads.map(async deadNodeId => {
|
|
319
|
+
console.log(`Removing dead thread. We contacted a node on the same port and same machine (${aliveNodeId}), which means the port has been reused by another thread, which proves that the old thread has died, as otherwise the new thread would not be able to use it.`);
|
|
320
|
+
await archives().del(deadNodeId);
|
|
321
|
+
}));
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
return nodes;
|
|
326
|
+
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
async function syncArchives() {
|
|
330
|
+
if (isServer()) {
|
|
331
|
+
// Make sure we are present
|
|
332
|
+
await writeHeartbeat();
|
|
333
|
+
let nodeIds = await archives().find("");
|
|
334
|
+
console.info(`Synced node ids from archives`, { nodeIds });
|
|
335
|
+
await setNodeIds(nodeIds);
|
|
336
|
+
} else {
|
|
337
|
+
if (isNoNetwork() || !isNode()) {
|
|
338
|
+
// NOTE: If no network, our trust source might be different, so we can't talk to regular nodes,
|
|
339
|
+
// and instead have to only talk to HTTP nodes
|
|
340
|
+
await setNodeIds([getBrowserUrlNode()]);
|
|
341
|
+
} else {
|
|
342
|
+
// If on the network, NetworkTrust2 should sync the trusted machines from backblaze, so we should be
|
|
343
|
+
// able to talk to any nodes.
|
|
344
|
+
// - If they user is using --client they only want to talk to querysub nodes. There might be multiple,
|
|
345
|
+
// which cloudflare will proxy, HOWEVER, it is more efficient to directly access the node list, which
|
|
346
|
+
// will be better for load balancing and updating on failure than the cloudflare proxying... probably.
|
|
347
|
+
await setNodeIds(await NodeDiscoveryController.nodes[getBrowserUrlNode()].getAllNodeIds());
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
async function runHeartbeatAuditLoop() {
|
|
353
|
+
await getAllNodeIds();
|
|
354
|
+
let deadCount = new Map<string, number>();
|
|
355
|
+
// 90% of the normal interval, so we don't run at the same time as the other audit
|
|
356
|
+
await runInfinitePollCallAtStart(CHECK_INTERVAL * 0.9, async () => {
|
|
357
|
+
if (shutdown) return;
|
|
358
|
+
// Wait a bit longer, to try to prevent all nodes from synchronizing their audit times.
|
|
359
|
+
console.log(magenta(`Auditing node list`));
|
|
360
|
+
await delay(CHECK_INTERVAL * Math.random() * 0.1);
|
|
361
|
+
//console.log(magenta(`Auditing node list`));
|
|
362
|
+
|
|
363
|
+
let deadTime = Date.now() - DEAD_THRESHOLD;
|
|
364
|
+
|
|
365
|
+
let nodeIds = await clearDeadThreadsFromArchives();
|
|
366
|
+
// We spent the money checking the node list, so we might as well update it
|
|
367
|
+
await setNodeIds(nodeIds);
|
|
368
|
+
|
|
369
|
+
let pendingDeadCount = 0;
|
|
370
|
+
|
|
371
|
+
let removedNodeIds: string[] = [];
|
|
372
|
+
for (let nodeId of nodeIds) {
|
|
373
|
+
let lastTime = Number((await archives().get(nodeId))?.toString()) || 0;
|
|
374
|
+
if (lastTime < deadTime) {
|
|
375
|
+
// Increment the dead count
|
|
376
|
+
let count = deadCount.get(nodeId) || 0;
|
|
377
|
+
count++;
|
|
378
|
+
deadCount.set(nodeId, count);
|
|
379
|
+
if (count >= DEAD_CHECK_COUNT) {
|
|
380
|
+
removedNodeIds.push(nodeId);
|
|
381
|
+
console.log(yellow(`Node ${nodeId} was found to be dead, removing from node list. Last heartbeat at ${formatDateTime(lastTime)}, dead threshold at ${formatDateTime(deadTime)}`));
|
|
382
|
+
await archives().del(nodeId);
|
|
383
|
+
deadCount.delete(nodeId);
|
|
384
|
+
} else {
|
|
385
|
+
console.log(yellow(`Node ${nodeId} was found to be dead, last heartbeat at ${formatDateTime(lastTime)} < dead threshold at ${formatDateTime(deadTime)}, dead count ${count}/${DEAD_CHECK_COUNT}. Total nodes seen ${nodeIds.length}`));
|
|
386
|
+
pendingDeadCount++;
|
|
387
|
+
}
|
|
388
|
+
} else {
|
|
389
|
+
deadCount.delete(nodeId);
|
|
390
|
+
console.info("Read node heartbeat", { nodeId, lastTime });
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
if (pendingDeadCount) {
|
|
394
|
+
console.log(blue(`Pending dead nodes ${pendingDeadCount}/${nodeIds.length}`));
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
if (removedNodeIds.length > 0) {
|
|
398
|
+
console.log(blue(`Removed ${removedNodeIds.length}/${nodeIds.length} nodes from node list`), { removedNodeIds });
|
|
399
|
+
await syncArchives();
|
|
400
|
+
await tellEveryoneNodesChanges(`removedNodeIds ${removedNodeIds.join("|")}`);
|
|
401
|
+
}
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// NOTE: Not just dead nodes, but also nodes that are unreachable, such as developer nodes.
|
|
406
|
+
let deadNodes = new Map<string, number>();
|
|
407
|
+
async function fastMemorySync() {
|
|
408
|
+
let aliveNodes = new Set(Array.from(allNodeIds2).filter(x => !isOwnNodeId(x)));
|
|
409
|
+
let deadThreshold = Date.now() - DEAD_NODE_POLL_COOLDOWN;
|
|
410
|
+
for (let [nodeId, time] of Array.from(deadNodes)) {
|
|
411
|
+
if (time < deadThreshold) {
|
|
412
|
+
deadNodes.delete(nodeId);
|
|
413
|
+
} else {
|
|
414
|
+
aliveNodes.delete(nodeId);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
let checkNodes = shuffle(Array.from(aliveNodes), Date.now()).slice(0, API_AUDIT_COUNT);
|
|
418
|
+
let otherNodesAll = await Promise.all(
|
|
419
|
+
checkNodes.map(async nodeId => {
|
|
420
|
+
let nodes = await timeoutToUndefinedSilent(200, NodeDiscoveryController.nodes[nodeId].getAllNodeIds());
|
|
421
|
+
if (!nodes) {
|
|
422
|
+
deadNodes.set(nodeId, Date.now());
|
|
423
|
+
}
|
|
424
|
+
return nodes || [];
|
|
425
|
+
})
|
|
426
|
+
);
|
|
427
|
+
let otherNodes = Array.from(new Set(otherNodesAll.flat()));
|
|
428
|
+
// This would log WAY too much, because we poll a lot, because we want to minimize downtime
|
|
429
|
+
//console.log(magenta(`Fast memory sync at ${formatVeryNiceDateTime(Date.now())}, nodes found ${otherNodes.length}`), getDebuggerUrl());
|
|
430
|
+
|
|
431
|
+
// If they are missing nodes that's fine. We constantly have extra nodes, and have to function correctly
|
|
432
|
+
// with extra nodes. However, if we are missing nodes, we'd prefer to have them quickly, so we should
|
|
433
|
+
// sync now.
|
|
434
|
+
let missingNodes = otherNodes.filter(nodeId => !allNodeIds2.has(nodeId));
|
|
435
|
+
if (missingNodes.length > 0) {
|
|
436
|
+
console.log(yellow(`Node list is missing nodes, resyncing node`), { missingNodes, otherNodes });
|
|
437
|
+
await syncArchives();
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
async function runMemoryAuditLoop() {
|
|
442
|
+
await getAllNodeIds();
|
|
443
|
+
runInfinitePoll(DISK_AUDIT_RATE, syncArchives);
|
|
444
|
+
runInfinitePoll(API_AUDIT_RATE, fastMemorySync);
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
async function writeHeartbeat() {
|
|
448
|
+
if (shutdown) return;
|
|
449
|
+
let now = Date.now();
|
|
450
|
+
let nodeId = getMountNodeId();
|
|
451
|
+
console.log(green(`Writing heartbeat ${formatDateTime(now)} for self (${nodeId})`));
|
|
452
|
+
if (!nodeId) return;
|
|
453
|
+
await archives().set(nodeId, Buffer.from(now + ""));
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
async function runMainSyncLoops() {
|
|
457
|
+
await syncArchives();
|
|
458
|
+
|
|
459
|
+
discoveryReady.resolve();
|
|
460
|
+
|
|
461
|
+
// We can't heartbeat until we mount
|
|
462
|
+
await SocketFunction.mountPromise;
|
|
463
|
+
|
|
464
|
+
// We have to write before we call NodeDiscoveryController.addNode
|
|
465
|
+
await writeHeartbeat();
|
|
466
|
+
await syncArchives();
|
|
467
|
+
let selfNodeId = SocketFunction.mountedNodeId;
|
|
468
|
+
if (!allNodeIds2.has(selfNodeId)) {
|
|
469
|
+
throw new Error(`Failed sanity check, our node id didn't appear in archives after we just wrote it? Missing ${SocketFunction.mountedNodeId}`);
|
|
470
|
+
}
|
|
471
|
+
// NOTE: Our first broadcast is special
|
|
472
|
+
await Promise.allSettled(Array.from(allNodeIds2).map(async nodeId => {
|
|
473
|
+
if (isOwnNodeId(nodeId)) return;
|
|
474
|
+
// Ignore errors, but wait a bit, so hopefully 99.99% of the time we can be certain
|
|
475
|
+
// all other nodes know our node id at this point.
|
|
476
|
+
await timeoutToUndefinedSilent(timeInSecond * 5, errorToUndefinedSilent(NodeDiscoveryController.nodes[nodeId].addNode(getOwnNodeId())));
|
|
477
|
+
}));
|
|
478
|
+
|
|
479
|
+
nodeBroadcasted.resolve();
|
|
480
|
+
|
|
481
|
+
console.log(magenta(`Node discovery is loaded`));
|
|
482
|
+
|
|
483
|
+
await runInfinitePollCallAtStart(HEARTBEAT_INTERVAL, async function nodeDiscoverHeartbeat() {
|
|
484
|
+
// If we waited too long, other nodes might think we are dead. In which case, we SHOULD terminate.
|
|
485
|
+
if (!isNoNetwork()) {
|
|
486
|
+
// FIRST, verify we didn't delay too long (to make sure we kill any nodes that were disconnected
|
|
487
|
+
// from the internet for too long)
|
|
488
|
+
let lastTime = Number((await archives().get(selfNodeId))?.toString());
|
|
489
|
+
let suicideThreshold = Date.now() - SUICIDE_HEARTBEAT_THRESHOLD;
|
|
490
|
+
if (!lastTime || lastTime < suicideThreshold) {
|
|
491
|
+
if (!lastTime) {
|
|
492
|
+
console.error(red(`Self node was removed due to not heartbeating. Terminating self process, as it likely has very stale data.`));
|
|
493
|
+
} else {
|
|
494
|
+
console.error(red(`Self node was has very old heartbeat. Terminating self process, as it likely has very stale data.`));
|
|
495
|
+
}
|
|
496
|
+
process.exit();
|
|
497
|
+
}
|
|
498
|
+
} else {
|
|
499
|
+
// We DO have to re-broadcast, otherwise no one will know we exist
|
|
500
|
+
for (let nodeId of allNodeIds2) {
|
|
501
|
+
if (isOwnNodeId(nodeId)) continue;
|
|
502
|
+
// Ignore errors, as nodes die often, and we only want to log once - when we actually remove the node id.
|
|
503
|
+
ignoreErrors(NodeDiscoveryController.nodes[nodeId].addNode(getOwnNodeId()));
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
await writeHeartbeat();
|
|
508
|
+
});
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
let discoveryReady = new PromiseObj<void>();
|
|
512
|
+
let nodeBroadcasted = new PromiseObj<void>();
|
|
513
|
+
beforeGetNodeAllId = async () => {
|
|
514
|
+
await discoveryReady.promise;
|
|
515
|
+
};
|
|
516
|
+
export async function onNodeDiscoveryReady() {
|
|
517
|
+
await getAllNodeIds();
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
export async function onNodeBroadcasted() {
|
|
521
|
+
await nodeBroadcasted.promise;
|
|
522
|
+
}
|
|
523
|
+
if (isServer()) {
|
|
524
|
+
setImmediate(async () => {
|
|
525
|
+
|
|
526
|
+
logErrors(runHeartbeatAuditLoop());
|
|
527
|
+
logErrors(runMemoryAuditLoop());
|
|
528
|
+
// NOTE: We used to wait until we mounted, but... we should be able to find nodes
|
|
529
|
+
// before we mount, right? (And what if we never mount?)
|
|
530
|
+
runMainSyncLoops().catch(e => {
|
|
531
|
+
discoveryReady.reject(e);
|
|
532
|
+
logErrors(Promise.reject(e));
|
|
533
|
+
});
|
|
534
|
+
});
|
|
535
|
+
} else {
|
|
536
|
+
|
|
537
|
+
if (isNode()) {
|
|
538
|
+
discoveryReady.resolve();
|
|
539
|
+
nodeBroadcasted.resolve();
|
|
540
|
+
// Just get the archives, syncing again if we haven't synced in a while
|
|
541
|
+
let lastGetTime = 0;
|
|
542
|
+
beforeGetNodeAllId = async () => {
|
|
543
|
+
let lastGetThreshold = lastGetTime + CLIENTSIDE_POLL_RATE;
|
|
544
|
+
if (Date.now() > lastGetThreshold) {
|
|
545
|
+
lastGetTime = Date.now();
|
|
546
|
+
await syncArchives();
|
|
547
|
+
}
|
|
548
|
+
};
|
|
549
|
+
} else {
|
|
550
|
+
setImmediate(() => {
|
|
551
|
+
let edgeNode = getBootedEdgeNode();
|
|
552
|
+
if (!edgeNode) {
|
|
553
|
+
throw new Error(`No edge node set during edgeBootstrap? This should be impossible.`);
|
|
554
|
+
}
|
|
555
|
+
let nodes = [edgeNode.host];
|
|
556
|
+
allNodeIds2 = new Set(nodes);
|
|
557
|
+
discoveryReady.resolve();
|
|
558
|
+
nodeBroadcasted.resolve();
|
|
559
|
+
|
|
560
|
+
// NOTE: We run into TLS issues (as in, our servers use self signed certs), if we try to talk to just
|
|
561
|
+
// any node, so... we better just talk to the edge node
|
|
562
|
+
// - We COULD probably just use some special domain (maybe JUST the machine domain?), with limited wildcard
|
|
563
|
+
// certs (I think we can only wildcard a single depth anyways), and A records for the machines too...
|
|
564
|
+
// but... having all traffic route through an edge node is probably better anyways...
|
|
565
|
+
nodeOverrides = nodes;
|
|
566
|
+
});
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
export async function forceRemoveNode(nodeId: string) {
|
|
572
|
+
await archives().del(nodeId);
|
|
573
|
+
void tellEveryoneNodesChanges(`forceRemoveNode ${nodeId}`);
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
/** Called on shutdown, to completely remove this node from discovery. */
|
|
578
|
+
export async function nodeDiscoveryShutdown() {
|
|
579
|
+
console.log(red(`Shutting down node discovery`));
|
|
580
|
+
shutdown = true;
|
|
581
|
+
if (isServer()) {
|
|
582
|
+
await archives().del(getOwnNodeId());
|
|
583
|
+
}
|
|
584
|
+
void tellEveryoneNodesChanges("nodeDiscoveryShutdown");
|
|
585
|
+
}
|
|
586
|
+
const tellEveryoneNodesChanges = throttleFunction(1000, function tellEveryoneNodesChanges(reason: string) {
|
|
587
|
+
if (isClient()) return;
|
|
588
|
+
console.log(red(`Telling everyone nodes changed`));
|
|
589
|
+
for (let nodeId of allNodeIds2) {
|
|
590
|
+
if (isOwnNodeId(nodeId)) continue;
|
|
591
|
+
ignoreErrors(NodeDiscoveryController.nodes[nodeId].resyncNodes(reason));
|
|
592
|
+
}
|
|
593
|
+
});
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
class NodeDiscoveryControllerBase {
|
|
597
|
+
public async isAlive() {
|
|
598
|
+
return true;
|
|
599
|
+
}
|
|
600
|
+
public async addNode(nodeId: string) {
|
|
601
|
+
console.log(magenta(`Received addNode`), { nodeId });
|
|
602
|
+
addNodeId(nodeId);
|
|
603
|
+
return true;
|
|
604
|
+
}
|
|
605
|
+
public async resyncNodes(reason: string) {
|
|
606
|
+
let caller = SocketFunction.getCaller();
|
|
607
|
+
console.log(magenta(`Received resyncNodes from ${caller.nodeId}, reason = ${reason}`));
|
|
608
|
+
await syncArchives();
|
|
609
|
+
}
|
|
610
|
+
public async getAllNodesHash(): Promise<string> {
|
|
611
|
+
return getAllNodesHash();
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
public async getAllNodeIds(): Promise<string[]> {
|
|
615
|
+
return Array.from(allNodeIds2);
|
|
616
|
+
}
|
|
617
|
+
public async getNodeId() {
|
|
618
|
+
return SocketFunction.mountedNodeId;
|
|
619
|
+
}
|
|
620
|
+
public async isNoNetwork() {
|
|
621
|
+
return isNoNetwork();
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
const NodeDiscoveryController = SocketFunction.register(
|
|
625
|
+
"NodeDiscoveryController-7991037e-fd9e-4085-b1db-52035487e72c",
|
|
626
|
+
new NodeDiscoveryControllerBase(),
|
|
627
|
+
() => ({
|
|
628
|
+
isAlive: {},
|
|
629
|
+
addNode: { hooks: [requiresNetworkTrustHook] },
|
|
630
|
+
resyncNodes: { hooks: [requiresNetworkTrustHook] },
|
|
631
|
+
getAllNodesHash: { hooks: [requiresNetworkTrustHook] },
|
|
632
|
+
// Skip client hooks, so we don't block on authentication (IdentityController), as some of these functions
|
|
633
|
+
// are needed for authentication to finish!
|
|
634
|
+
getAllNodeIds: { noClientHooks: true, noDefaultHooks: true },
|
|
635
|
+
getNodeId: { noClientHooks: true, noDefaultHooks: true },
|
|
636
|
+
isNoNetwork: { noClientHooks: true, noDefaultHooks: true },
|
|
637
|
+
}),
|
|
638
|
+
() => ({
|
|
639
|
+
|
|
640
|
+
})
|
|
641
641
|
);
|