querysub 0.415.0 → 0.416.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "querysub",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.416.0",
|
|
4
4
|
"main": "index.js",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"note1": "note on node-forge fork, see https://github.com/digitalbazaar/forge/issues/744 for details",
|
|
@@ -62,7 +62,7 @@
|
|
|
62
62
|
"pako": "^2.1.0",
|
|
63
63
|
"peggy": "^5.0.6",
|
|
64
64
|
"querysub": "^0.357.0",
|
|
65
|
-
"socket-function": "^1.1.
|
|
65
|
+
"socket-function": "^1.1.19",
|
|
66
66
|
"terser": "^5.31.0",
|
|
67
67
|
"typesafecss": "^0.28.0",
|
|
68
68
|
"yaml": "^2.5.0",
|
|
@@ -2,7 +2,7 @@ import { SocketFunction } from "socket-function/SocketFunction";
|
|
|
2
2
|
import { getArchives } from "../-a-archives/archives";
|
|
3
3
|
import { getDomain, isDevDebugbreak, isNoNetwork, isPublic } from "../config";
|
|
4
4
|
import { measureBlock } from "socket-function/src/profiling/measure";
|
|
5
|
-
import { isNode, sha256Hash, throttleFunction, timeInMinute, timeInSecond } from "socket-function/src/misc";
|
|
5
|
+
import { isNode, keyByArray, sha256Hash, throttleFunction, timeInMinute, timeInSecond } from "socket-function/src/misc";
|
|
6
6
|
import { errorToUndefinedSilent, ignoreErrors, logErrors, timeoutToError, timeoutToUndefinedSilent } from "../errors";
|
|
7
7
|
import { ensureWeAreTrusted, requiresNetworkTrustHook } from "../-d-trust/NetworkTrust2";
|
|
8
8
|
import { delay, runInfinitePoll, runInfinitePollCallAtStart } from "socket-function/src/batching";
|
|
@@ -22,6 +22,7 @@ import { EdgeNodeConfig } from "../4-deploy/edgeNodes";
|
|
|
22
22
|
import * as certs from "../-a-auth/certs";
|
|
23
23
|
import { logDisk } from "../diagnostics/logs/diskLogger";
|
|
24
24
|
import { MaybePromise } from "socket-function/src/types";
|
|
25
|
+
import { getPathStr2 } from "../path";
|
|
25
26
|
|
|
26
27
|
let HEARTBEAT_INTERVAL = timeInMinute * 15;
|
|
27
28
|
// Interval which we check other heartbeats
|
|
@@ -196,10 +197,6 @@ function addNodeIdBase(nodeId: string) {
|
|
|
196
197
|
}
|
|
197
198
|
async function setNodeIds(nodeIds: string[]) {
|
|
198
199
|
nodeIds = nodeIds.filter(x => x !== SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE);
|
|
199
|
-
if (isNode()) {
|
|
200
|
-
await Promise.allSettled(nodeIds.map(checkWrongServerNodeId));
|
|
201
|
-
nodeIds = nodeIds.filter(nodeId => !wrongServerNodeIds.has(nodeId));
|
|
202
|
-
}
|
|
203
200
|
|
|
204
201
|
console.info("setNodeIds", { nodeIds });
|
|
205
202
|
let newNodeIds = nodeIds.filter(nodeId => !allNodeIds2.has(nodeId));
|
|
@@ -297,53 +294,37 @@ export async function triggerNodeChange() {
|
|
|
297
294
|
}));
|
|
298
295
|
}
|
|
299
296
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
let
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
if (
|
|
307
|
-
|
|
308
|
-
}
|
|
309
|
-
// Clear it right away, so we can check for it being alive quickly.
|
|
310
|
-
checkWrongServerNodeId.clear(nodeId);
|
|
311
|
-
return;
|
|
312
|
-
}
|
|
313
|
-
if (callFactory) {
|
|
314
|
-
// Not great, but... this should work well enough.
|
|
315
|
-
for (let i = 0; i < 10; i++) {
|
|
316
|
-
if (callFactory.receivedInitializeState) break;
|
|
317
|
-
await delay(500);
|
|
318
|
-
}
|
|
319
|
-
if (!callFactory.receivedInitializeState && SocketFunction.logMessages) {
|
|
320
|
-
console.log(`Did not receive initialize state from ${nodeId}`);
|
|
321
|
-
}
|
|
322
|
-
} else {
|
|
323
|
-
if (SocketFunction.logMessages) {
|
|
324
|
-
console.log(`Did not find call factory for ${nodeId}`);
|
|
325
|
-
}
|
|
297
|
+
|
|
298
|
+
async function clearDeadThreadsFromArchives() {
|
|
299
|
+
let nodes = await archives().find("");
|
|
300
|
+
|
|
301
|
+
function getPortHash(nodeId: string) {
|
|
302
|
+
let obj = decodeNodeId(nodeId);
|
|
303
|
+
if (!obj) return undefined;
|
|
304
|
+
return getPathStr2(obj.machineId, obj.port + "");
|
|
326
305
|
}
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
306
|
+
let byPortHash = keyByArray(nodes, getPortHash);
|
|
307
|
+
for (let [portHash, nodeIds] of Array.from(byPortHash)) {
|
|
308
|
+
if (!portHash) continue;
|
|
309
|
+
let aliveNodeId = "";
|
|
310
|
+
await Promise.all(nodeIds.map(async nodeId => {
|
|
311
|
+
let alive = await errorToUndefinedSilent(NodeDiscoveryController.nodes[nodeId].isAlive());
|
|
312
|
+
if (alive) {
|
|
313
|
+
aliveNodeId = nodeId;
|
|
314
|
+
}
|
|
315
|
+
}));
|
|
316
|
+
if (aliveNodeId) {
|
|
317
|
+
let deadThreads = nodeIds.filter(nodeId => nodeId !== aliveNodeId);
|
|
318
|
+
await Promise.all(deadThreads.map(async deadNodeId => {
|
|
319
|
+
console.log(`Removing dead thread. We contacted a node on the same port and same machine (${aliveNodeId}), which means the port has been reused by another thread, which proves that the old thread has died, as otherwise the new thread would not be able to use it.`);
|
|
320
|
+
await archives().del(deadNodeId);
|
|
321
|
+
}));
|
|
340
322
|
}
|
|
341
323
|
}
|
|
342
324
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
});
|
|
325
|
+
return nodes;
|
|
326
|
+
|
|
327
|
+
}
|
|
347
328
|
|
|
348
329
|
async function syncArchives() {
|
|
349
330
|
if (isServer()) {
|
|
@@ -381,7 +362,8 @@ async function runHeartbeatAuditLoop() {
|
|
|
381
362
|
//console.log(magenta(`Auditing node list`));
|
|
382
363
|
|
|
383
364
|
let deadTime = Date.now() - DEAD_THRESHOLD;
|
|
384
|
-
|
|
365
|
+
|
|
366
|
+
let nodeIds = await clearDeadThreadsFromArchives();
|
|
385
367
|
// We spent the money checking the node list, so we might as well update it
|
|
386
368
|
await setNodeIds(nodeIds);
|
|
387
369
|
|
|
@@ -451,7 +433,6 @@ async function fastMemorySync() {
|
|
|
451
433
|
// with extra nodes. However, if we are missing nodes, we'd prefer to have them quickly, so we should
|
|
452
434
|
// sync now.
|
|
453
435
|
let missingNodes = otherNodes.filter(nodeId => !allNodeIds2.has(nodeId));
|
|
454
|
-
missingNodes = missingNodes.filter(nodeId => !wrongServerNodeIds.has(nodeId));
|
|
455
436
|
if (missingNodes.length > 0) {
|
|
456
437
|
console.log(yellow(`Node list is missing nodes, resyncing node`), { missingNodes, otherNodes });
|
|
457
438
|
await syncArchives();
|
|
@@ -614,6 +595,9 @@ const tellEveryoneNodesChanges = throttleFunction(1000, function tellEveryoneNod
|
|
|
614
595
|
|
|
615
596
|
|
|
616
597
|
class NodeDiscoveryControllerBase {
|
|
598
|
+
public async isAlive() {
|
|
599
|
+
return true;
|
|
600
|
+
}
|
|
617
601
|
public async addNode(nodeId: string) {
|
|
618
602
|
console.log(magenta(`Received addNode`), { nodeId });
|
|
619
603
|
addNodeId(nodeId);
|
|
@@ -642,7 +626,7 @@ const NodeDiscoveryController = SocketFunction.register(
|
|
|
642
626
|
"NodeDiscoveryController-7991037e-fd9e-4085-b1db-52035487e72c",
|
|
643
627
|
new NodeDiscoveryControllerBase(),
|
|
644
628
|
() => ({
|
|
645
|
-
|
|
629
|
+
isAlive: {},
|
|
646
630
|
addNode: { hooks: [requiresNetworkTrustHook] },
|
|
647
631
|
resyncNodes: { hooks: [requiresNetworkTrustHook] },
|
|
648
632
|
getAllNodesHash: { hooks: [requiresNetworkTrustHook] },
|
|
@@ -581,6 +581,11 @@ function createObjectAliveChecker(data: () => any, path: SchemaPath, delay: numb
|
|
|
581
581
|
});
|
|
582
582
|
}
|
|
583
583
|
|
|
584
|
+
let querysub: typeof import("../4-querysub/Querysub") | undefined = undefined;
|
|
585
|
+
setImmediate(async () => {
|
|
586
|
+
querysub = await import("../4-querysub/Querysub");
|
|
587
|
+
});
|
|
588
|
+
|
|
584
589
|
let callIdOverrideFncs: Map<string, Map<string, {
|
|
585
590
|
prefix: string;
|
|
586
591
|
getKey: (...args: unknown[]) => string;
|
|
@@ -591,11 +596,22 @@ export function getCallIdOverride(config: {
|
|
|
591
596
|
callId: string;
|
|
592
597
|
args: unknown[];
|
|
593
598
|
}): string {
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
599
|
+
try {
|
|
600
|
+
if (querysub) {
|
|
601
|
+
if (!querysub.Querysub.isInSyncedCall()) {
|
|
602
|
+
// NOTE: This is wrong and it will result in loading user being used as a key sometimes. However, it should be fine, as if the call id override is wrong, it shouldn't break anything.
|
|
603
|
+
return querysub.Querysub.localRead(() => getCallIdOverride(config));
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
let def = callIdOverrideFncs.get(config.moduleId)?.get(config.functionId);
|
|
607
|
+
if (!def) return config.callId;
|
|
608
|
+
return createRoutingOverrideKey({
|
|
609
|
+
remappedPrefix: def.prefix,
|
|
610
|
+
originalKey: config.callId,
|
|
611
|
+
routeKey: def.getKey(...config.args),
|
|
612
|
+
});
|
|
613
|
+
} catch (e: any) {
|
|
614
|
+
console.warn(`Error getting call id override for ${config.moduleId}.${config.functionId}, falling back to original call id`, { error: e.stack });
|
|
615
|
+
return config.callId;
|
|
616
|
+
}
|
|
601
617
|
}
|
|
@@ -150,6 +150,7 @@ export class SyncTestPage extends qreact.Component {
|
|
|
150
150
|
hasError = true;
|
|
151
151
|
}
|
|
152
152
|
if (hasError) return undefined;
|
|
153
|
+
if (!result?.time) return undefined;
|
|
153
154
|
return (
|
|
154
155
|
<div className={css.hbox(5).hsl(0, 0, 100).pad2(2)} title={JSON.stringify(thread)}>
|
|
155
156
|
<div>{getUniqueThreadName(thread, allThreads)}</div>
|
package/tempnotes.txt
CHANGED
|
@@ -2,60 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
Local CYOA + Local FunctionRunner works
|
|
4
4
|
|
|
5
|
+
1) Deploy
|
|
6
|
+
2) Verify this works
|
|
5
7
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
Hmm... we ask to watch it, but the watch doesn't show up on the other side? Fuck...
|
|
12
|
-
|
|
13
|
-
Hmm, it's still failing in the same way. The function runner just has, like, no watches. It tried to watch I assume the same thing as before. Nothing showed up on the other end.
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
So we have the remote watcher, so it looks like we were watching it, but we don't think it's synced, so we clearly didn't get a value back.
|
|
19
|
-
|
|
20
|
-
WATCHER FAILED TO SYNC findFunctionsToCall DID NOT RECEIVE PATH VALUES. This means PathValueServer is not responding to watches, either to specific paths, or for all paths [
|
|
21
|
-
'.,querysubtest._com.,',
|
|
22
|
-
'.,querysubtest._com.,PathFunctionRunner.,'
|
|
23
|
-
] [ '.,querysubtest._com.,PathFunctionRunner.,' ] [Function: findFunctionsToCall]
|
|
24
|
-
Node list is missing nodes, resyncing node {
|
|
25
|
-
missingNodes: [ '716181d6570f7b55.a794fbcf7b104c68.querysubtest.com:34689' ],
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
UGH... poking fixed it. WHICH MEANS, we have to break it again, and get into the bad state again!
|
|
29
|
-
- Ugh...
|
|
30
|
-
|
|
31
|
-
-1) We got into a bad state where The query sub nodes weren't really listening or it's it th they had zombie watchers that hadn't finished synchronizing, but it wasn't complaining about being able to un to find any any paths. There were no unwatched paths
|
|
32
|
-
- It fixed when we poked them.
|
|
33
|
-
|
|
34
|
-
0) If it fails after redeploying, disable the server and run everything locally again, as we made a lot of changes, and so it might not even work locally.
|
|
8
|
+
1) verify this warning is no longer in our logs "Node list is missing nodes, resyncing node"
|
|
9
|
+
- We should check the logs. I'm assuming what happened is that at some point some server thought it was the wrong node ID and deleted it?
|
|
10
|
+
- It's generally the same ones, except new ones get added.
|
|
35
11
|
|
|
36
|
-
-1) Get a test script that doesn't even use the function runner working.
|
|
37
12
|
|
|
38
|
-
|
|
39
|
-
- Also, see if we run the local query subserver and local function runner, if those writes can be seen remotely. If it's a remote function runner problem but not a remote path value server problem, we should see the writes.
|
|
13
|
+
-1) A lot of API calls are getting locked up and never finishing. We need to fix this. What the fuck is happening? It happens a lot with audio calls, as we make a lot of audio calls, but we can probably replicate it with non-audio calls. I think it happened with some embeddings, although we did create a lot of embeddings, and it only happened at the end. If it's only an audio thing, that's different, but we still need some kind of timeout. But I don't think it's an audio thing, I think it's not actually starting.
|
|
40
14
|
|
|
41
|
-
-1) Fix the remote servers not being able to send values to our local path value server. I don't know why it wouldn't work... They should have changed the name knowing our local name.
|
|
42
15
|
|
|
43
|
-
1) Use connection page to verify server can talk to our locally hosted server
|
|
44
|
-
1.1) Verify by breaking into our local server that we are receiving values written on the remote server in the local server.
|
|
45
|
-
- This is extremely important. Without this, we can't run a local server. There's all kinds of issues, but our socket function changes should make this so this just works.
|
|
46
16
|
|
|
47
17
|
0) SHARD THE FUNCTION RUNNER!
|
|
48
18
|
- And secondary sharding for backup...
|
|
49
19
|
- First shard locally
|
|
50
20
|
- Test on our sync test page (Otherwise all the writes are going to go to the same function runner anyway, so it's not a good test.
|
|
51
21
|
|
|
52
|
-
-2) Errors are not being automatically grouped.
|
|
53
|
-
- We are getting notifications though, which is weird. The notifications are supposed to be automatically grouping it. We should probably look at its logs and see why.
|
|
54
|
-
|
|
55
|
-
0) We just are constantly "Node list is missing nodes, resyncing node"
|
|
56
|
-
- We should check the logs. I'm assuming what happened is that at some point some server thought it was the wrong node ID and deleted it?
|
|
57
|
-
- It's generally the same ones, except new ones get added.
|
|
58
|
-
|
|
59
22
|
I think even if we run into some occasional issues, we should just power through and try to fix them later. Because I'm sick of working on the framework...
|
|
60
23
|
|
|
61
24
|
MONTHLY SUMMARY!
|