querysub 0.451.0 → 0.453.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +12 -1
- package/bin/join-public.js +1 -0
- package/package.json +1 -1
- package/src/-a-archives/archiveCache.ts +53 -597
- package/src/-g-core-values/NodeCapabilities.ts +29 -28
- package/src/0-path-value-core/archiveLocks/ArchiveLocks2.ts +24 -0
- package/src/0-path-value-core/pathValueCore.ts +1 -1
- package/src/2-proxy/PathValueProxyWatcher.ts +6 -6
- package/src/4-querysub/Querysub.ts +15 -13
- package/src/archiveapps/archiveGCEntry.tsx +1 -0
- package/src/archiveapps/archiveJoinEntry.ts +8 -2
- package/src/deployManager/LaunchTrackingHeader.tsx +65 -0
- package/src/deployManager/machineApplyMainCode.ts +140 -15
- package/src/deployManager/machineSchema.ts +82 -1
- package/src/diagnostics/NodeConnectionsPage.tsx +1 -1
- package/src/diagnostics/NodeViewer.tsx +15 -25
- package/src/diagnostics/debugger/mcp-server.ts +327 -53
- package/src/diagnostics/logs/IndexedLogs/IndexedLogs.ts +2 -2
- package/src/diagnostics/logs/IndexedLogs/MCPIndexedLogs.ts +64 -22
- package/src/diagnostics/logs/IndexedLogs/MCPIndexedLogsEntry.ts +32 -1
- package/src/diagnostics/managementPages.tsx +8 -0
- package/src/diagnostics/misc-pages/AuthoritySpecPage.tsx +113 -0
- package/src/diagnostics/pathAuditer.ts +0 -6
- package/test.ts +2 -1
- package/src/misc/getParentProcessId.cs +0 -53
- package/src/misc/getParentProcessId.ts +0 -53
|
@@ -12,6 +12,7 @@ import { delay } from "socket-function/src/batching";
|
|
|
12
12
|
import debugbreak from "debugbreak";
|
|
13
13
|
import { formatTime } from "socket-function/src/formatting/format";
|
|
14
14
|
import type { DebugFunctionShardInfo } from "../3-path-functions/PathFunctionRunner";
|
|
15
|
+
import type { AuthoritySpec } from "../0-path-value-core/PathRouter";
|
|
15
16
|
import { requiresNetworkTrustHook } from "../-d-trust/NetworkTrust2";
|
|
16
17
|
import { isNoNetwork } from "../config";
|
|
17
18
|
import { getDebuggerUrl } from "../diagnostics/listenOnDebugger";
|
|
@@ -94,10 +95,10 @@ export async function getControllerNodeIdList(
|
|
|
94
95
|
await Promise.all(nodeIdsToTest.map(async nodeId => {
|
|
95
96
|
let result = await doesNodeExposeController(nodeId, controller);
|
|
96
97
|
if (result) {
|
|
97
|
-
let
|
|
98
|
+
let metadata = await NodeCapabilitiesController.nodes[nodeId].getMetadata();
|
|
98
99
|
passedNodeIds.set(nodeId, {
|
|
99
100
|
machineId: getMachineId(nodeId),
|
|
100
|
-
entryPoint,
|
|
101
|
+
entryPoint: metadata.entryPoint,
|
|
101
102
|
});
|
|
102
103
|
}
|
|
103
104
|
}));
|
|
@@ -117,38 +118,43 @@ export async function getControllerNodeIdList(
|
|
|
117
118
|
|
|
118
119
|
|
|
119
120
|
export async function doesNodeExposeController(reconnectNodeId: string, controller: SocketRegistered<{}>): Promise<boolean> {
|
|
120
|
-
let
|
|
121
|
+
let metadata = await timeoutToUndefinedSilent(10_000, NodeCapabilitiesController.nodes[reconnectNodeId].getMetadata());
|
|
121
122
|
|
|
122
|
-
if (exposedControllers
|
|
123
|
+
if (metadata?.exposedControllers.includes(controller._classGuid)) {
|
|
123
124
|
return true;
|
|
124
125
|
}
|
|
125
126
|
return false;
|
|
126
127
|
}
|
|
127
128
|
|
|
128
129
|
const startupTime = Date.now();
|
|
130
|
+
|
|
131
|
+
export type NodeMetadata = {
|
|
132
|
+
entryPoint: string;
|
|
133
|
+
startupTime: number;
|
|
134
|
+
processId: number;
|
|
135
|
+
functionRunnerShards: DebugFunctionShardInfo[];
|
|
136
|
+
authoritySpec: AuthoritySpec;
|
|
137
|
+
exposedControllers: string[];
|
|
138
|
+
trueTimeOffset: number;
|
|
139
|
+
};
|
|
140
|
+
|
|
129
141
|
class NodeCapabilitiesControllerBase {
|
|
130
|
-
public async
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
142
|
+
public async getMetadata(): Promise<NodeMetadata> {
|
|
143
|
+
let { authorityLookup } = await import("../0-path-value-core/AuthorityLookup");
|
|
144
|
+
return {
|
|
145
|
+
entryPoint: process.argv[1],
|
|
146
|
+
startupTime,
|
|
147
|
+
processId: process.pid,
|
|
148
|
+
functionRunnerShards: getFunctionRunnerShards(),
|
|
149
|
+
authoritySpec: authorityLookup.getOurSpec(),
|
|
150
|
+
exposedControllers: Array.from(SocketFunction.exposedClasses),
|
|
151
|
+
trueTimeOffset: getTrueTimeOffset(),
|
|
152
|
+
};
|
|
138
153
|
}
|
|
154
|
+
|
|
139
155
|
public async getMemoryUsage() {
|
|
140
156
|
return process.memoryUsage();
|
|
141
157
|
}
|
|
142
|
-
public async getProcessId() {
|
|
143
|
-
return process.pid;
|
|
144
|
-
}
|
|
145
|
-
public async getFunctionRunnerShards() {
|
|
146
|
-
return getFunctionRunnerShards();
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
public async getTrueTimeOffset() {
|
|
150
|
-
return getTrueTimeOffset();
|
|
151
|
-
}
|
|
152
158
|
|
|
153
159
|
public async getInspectURL() {
|
|
154
160
|
return await getDebuggerUrl();
|
|
@@ -188,13 +194,8 @@ export const NodeCapabilitiesController = SocketFunction.register(
|
|
|
188
194
|
"NodeCapabilitiesController-399b7067-75c3-4d92-8be3-8470bde32d3c",
|
|
189
195
|
new NodeCapabilitiesControllerBase(),
|
|
190
196
|
() => ({
|
|
191
|
-
|
|
192
|
-
getEntryPoint: {},
|
|
193
|
-
getStartupTime: {},
|
|
197
|
+
getMetadata: {},
|
|
194
198
|
getMemoryUsage: {},
|
|
195
|
-
getProcessId: {},
|
|
196
|
-
getFunctionRunnerShards: {},
|
|
197
|
-
getTrueTimeOffset: {},
|
|
198
199
|
getInspectURL: { hooks: [requiresNetworkTrustHook] },
|
|
199
200
|
exposeExternalDebugPortOnce: { hooks: [requiresNetworkTrustHook] },
|
|
200
201
|
}),
|
|
@@ -245,7 +245,31 @@ export function createArchiveLocker2(config: {
|
|
|
245
245
|
newTransaction.ops.push({ type: "delete", key: obj.file });
|
|
246
246
|
}
|
|
247
247
|
}
|
|
248
|
+
// Per-op trace so a "written too slowly" crash on a specific file can be
|
|
249
|
+
// correlated against the exact transaction that touched it. The aggregate
|
|
250
|
+
// "Joining N => M" log doesn't name the files. We log before the attempt and
|
|
251
|
+
// again after with the resulting status (including "rejected").
|
|
252
|
+
const describeOp = (op: typeof newTransaction.ops[number]): string => {
|
|
253
|
+
let detail = "";
|
|
254
|
+
if (op.key.endsWith(".data") || op.key.endsWith(".data.locked")) {
|
|
255
|
+
try {
|
|
256
|
+
detail = ` source=${pathValueArchives.decodeDataPath(op.key).sourceType}`;
|
|
257
|
+
} catch (e) {
|
|
258
|
+
detail = ` decodeError=${(e as Error).message ?? e}`;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
return `${op.type} ${op.key}${detail}`;
|
|
262
|
+
};
|
|
263
|
+
for (let op of newTransaction.ops) {
|
|
264
|
+
console.info(`Transaction attempt: ${describeOp(op)}`);
|
|
265
|
+
}
|
|
266
|
+
|
|
248
267
|
let status = await locker.addTransaction(newTransaction);
|
|
268
|
+
|
|
269
|
+
for (let op of newTransaction.ops) {
|
|
270
|
+
console.info(`Transaction ${status}: ${describeOp(op)}`);
|
|
271
|
+
}
|
|
272
|
+
|
|
249
273
|
if (status === "accepted") {
|
|
250
274
|
let newFiles = new Set<string>();
|
|
251
275
|
for (let file of files) {
|
|
@@ -68,7 +68,7 @@ export const MAX_CHANGE_AGE = MAX_ACCEPTED_CHANGE_AGE * 2;
|
|
|
68
68
|
/** Extra time we keep clientside prediction rejections for, to give us time to receive the actual values. */
|
|
69
69
|
export const CLIENTSIDE_PREDICT_LEEWAY = 500;
|
|
70
70
|
|
|
71
|
-
/** Any PathValues which take longer than this to
|
|
71
|
+
/** Any PathValues which take longer than this to write should be rejected, so... we have
|
|
72
72
|
* to write well before this time.
|
|
73
73
|
* - This has to be at least MAX_CHANGE_AGE * 4.5 + the time to serialize and
|
|
74
74
|
* send our data to remote storage.
|
|
@@ -1472,8 +1472,8 @@ export class PathValueProxyWatcher {
|
|
|
1472
1472
|
let notWatchingUnsyncedParent = reallyUnsyncedParentAccesses.filter(x => !remoteWatcher.debugIsWatchingPath(x));
|
|
1473
1473
|
if (notWatchingUnsynced.length !== 0 || notWatchingUnsyncedParent.length !== 0) {
|
|
1474
1474
|
console.error((`${red("WATCHER FAILED TO SYNC")} ${watcher.debugName} ${magenta("NOT REMOTE WATCHING REQUIRED PATHS")}. This means our sync or unsync (likely unsync) logic is broken, in remoteWatcher/clientWatcher. OR, there were no read nodes when we tried to sync (we don't handle missing read nodes correctly at the moment)`), { notWatchingUnsynced, notWatchingUnsyncedParent }, watcher.options.watchFunction);
|
|
1475
|
-
debugbreak(2);
|
|
1476
|
-
debugger;
|
|
1475
|
+
// debugbreak(2);
|
|
1476
|
+
// debugger;
|
|
1477
1477
|
} else {
|
|
1478
1478
|
console.error((`${red("WATCHER FAILED TO SYNC")} ${watcher.debugName} ${magenta("DID NOT RECEIVE PATH VALUES")}. This means PathValueServer is not responding to watches, either to specific paths, or for all paths`), { reallyUnsyncedAccesses, reallyUnsyncedParentAccesses }, watcher.options.watchFunction);
|
|
1479
1479
|
// debugbreak(2);
|
|
@@ -1481,12 +1481,12 @@ export class PathValueProxyWatcher {
|
|
|
1481
1481
|
}
|
|
1482
1482
|
} else if (watcher.lastSpecialPromiseUnsynced) {
|
|
1483
1483
|
console.warn((`${yellow("WATCHER SLOW TO SYNC")} ${watcher.debugName} ${magenta("DEPENDENT PROMISE NEVER RESOLVED")}. This promise might resolve, but it probably won't. Slow promises should be detached from the watcher system and use multiple watchers/writes, instead of blocking on a promise.`), watcher.lastSpecialPromiseUnsyncedReason, watcher.options.watchFunction);
|
|
1484
|
-
debugbreak(2);
|
|
1485
|
-
debugger;
|
|
1484
|
+
// debugbreak(2);
|
|
1485
|
+
// debugger;
|
|
1486
1486
|
} else {
|
|
1487
1487
|
console.error((`${red("WATCHER FAILED TO SYNC")} ${watcher.debugName} ${magenta("DID NOT TRIGGER WATCHER")}. This means either ProxyWatcher is broken (and isn't triggering when it should, or isn't watching when it should), or ClientWatcher/PathWatcher are broken and are not properly informing callers of watchers.`), { lastUnsyncedAccesses: watcher.lastUnsyncedAccesses, lastUnsyncedParentAccesses: watcher.lastUnsyncedParentAccesses }, watcher.options.watchFunction);
|
|
1488
|
-
debugbreak(2);
|
|
1489
|
-
debugger;
|
|
1488
|
+
// debugbreak(2);
|
|
1489
|
+
// debugger;
|
|
1490
1490
|
}
|
|
1491
1491
|
}, 60000);
|
|
1492
1492
|
}
|
|
@@ -44,7 +44,7 @@ import { waitForFirstTimeSync } from "socket-function/time/trueTimeShim";
|
|
|
44
44
|
import { logMeasureTable, measureBlock, measureFnc, measureWrap, startMeasure } from "socket-function/src/profiling/measure";
|
|
45
45
|
import { delay } from "socket-function/src/batching";
|
|
46
46
|
import { MaybePromise } from "socket-function/src/types";
|
|
47
|
-
import { devDebugbreak, getDomain, isBootstrapOnly, isDynamicallyLoading, isPublic, noSyncing } from "../config";
|
|
47
|
+
import { devDebugbreak, getDomain, isBootstrapOnly, isDynamicallyLoading, isPublic, isRecovery, noSyncing } from "../config";
|
|
48
48
|
import { Schema2, Schema2T, t } from "../2-proxy/schema2";
|
|
49
49
|
import { CALL_PERMISSIONS_KEY } from "./permissionsShared";
|
|
50
50
|
import yargs, { check } from "yargs";
|
|
@@ -1035,19 +1035,21 @@ export class Querysub {
|
|
|
1035
1035
|
return module;
|
|
1036
1036
|
}
|
|
1037
1037
|
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
await isAllowedToSeeSource(configObj?.signedIdentity)
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1038
|
+
if (!isRecovery()) {
|
|
1039
|
+
RequireController.addMapGetModules(async (result, args) => {
|
|
1040
|
+
let configObj = args[2] as { signedIdentity: SignedIdentity | undefined } | undefined;
|
|
1041
|
+
if (!await isAllowedToSeeSource(configObj?.signedIdentity)) {
|
|
1042
|
+
await isAllowedToSeeSource(configObj?.signedIdentity);
|
|
1043
|
+
//console.log(red(`Not allowed to see source`));
|
|
1044
|
+
for (let [key, value] of Object.entries(result.modules)) {
|
|
1045
|
+
result.modules[key] = stripSource(value);
|
|
1046
|
+
}
|
|
1047
|
+
} else {
|
|
1048
|
+
//console.log(green(`Allowed to see source`));
|
|
1045
1049
|
}
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
return result;
|
|
1050
|
-
});
|
|
1050
|
+
return result;
|
|
1051
|
+
});
|
|
1052
|
+
}
|
|
1051
1053
|
}
|
|
1052
1054
|
|
|
1053
1055
|
public static async hostService(name: string, port = 0) {
|
|
@@ -2,7 +2,7 @@ import "../inject";
|
|
|
2
2
|
|
|
3
3
|
import { logErrors } from "../errors";
|
|
4
4
|
import { PathValueArchives, pathValueArchives } from "../0-path-value-core/pathValueArchives";
|
|
5
|
-
import { PathValue, VALUE_GC_THRESHOLD } from "../0-path-value-core/pathValueCore";
|
|
5
|
+
import { ARCHIVE_FLUSH_LIMIT, PathValue, VALUE_GC_THRESHOLD } from "../0-path-value-core/pathValueCore";
|
|
6
6
|
import { runInfinitePollCallAtStart } from "socket-function/src/batching";
|
|
7
7
|
import { measureBlock } from "socket-function/src/profiling/measure";
|
|
8
8
|
import { pathValueSerializer } from "../-h-path-value-serialize/PathValueSerializer";
|
|
@@ -41,7 +41,12 @@ async function runGenesisJoinIteration(config?: { force?: boolean }) {
|
|
|
41
41
|
valueFiles = valueFiles.filter(x => {
|
|
42
42
|
let obj = pathValueArchives.decodeDataPath(x.file);
|
|
43
43
|
if (!obj.minTime) return false;
|
|
44
|
-
|
|
44
|
+
if (obj.sourceType === "genesis") {
|
|
45
|
+
// Has to be old enough
|
|
46
|
+
return Date.now() - obj.time < ARCHIVE_FLUSH_LIMIT;
|
|
47
|
+
}
|
|
48
|
+
// Anything else can be merged immediately.
|
|
49
|
+
return true;
|
|
45
50
|
});
|
|
46
51
|
}
|
|
47
52
|
let withinTimeRangeCount = valueFiles.length;
|
|
@@ -123,6 +128,7 @@ async function main() {
|
|
|
123
128
|
await Querysub.hostService("join");
|
|
124
129
|
|
|
125
130
|
if (yargObj.watch) {
|
|
131
|
+
console.log("Running in watch mode.");
|
|
126
132
|
await runInfinitePollCallAtStart(VALUE_GC_THRESHOLD * 0.8, runGenesisJoinIteration);
|
|
127
133
|
} else {
|
|
128
134
|
try {
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
module.allowclient = true;
|
|
2
|
+
|
|
3
|
+
import { qreact } from "../4-dom/qreact";
|
|
4
|
+
import { css } from "typesafecss";
|
|
5
|
+
import { getBrowserUrlNode } from "../-f-node-discovery/NodeDiscovery";
|
|
6
|
+
import { MachineServiceController } from "./machineSchema";
|
|
7
|
+
|
|
8
|
+
const SINCE_DAYS = 2;
|
|
9
|
+
const CRASH_HUE = 0;
|
|
10
|
+
const OTHER_HUE = 210;
|
|
11
|
+
|
|
12
|
+
export class LaunchTrackingHeader extends qreact.Component {
|
|
13
|
+
render() {
|
|
14
|
+
let summaries = MachineServiceController(getBrowserUrlNode()).getRecentLaunches(SINCE_DAYS);
|
|
15
|
+
if (!summaries) return undefined;
|
|
16
|
+
if (summaries.length === 0) return undefined;
|
|
17
|
+
|
|
18
|
+
let totalCrashes = 0;
|
|
19
|
+
let totalOther = 0;
|
|
20
|
+
let perKey = new Map<string, { crashes: number; other: number }>();
|
|
21
|
+
for (let s of summaries) {
|
|
22
|
+
let isCrash = s.reason === "crashed";
|
|
23
|
+
if (isCrash) totalCrashes++;
|
|
24
|
+
else totalOther++;
|
|
25
|
+
let entry = perKey.get(s.serviceKey);
|
|
26
|
+
if (!entry) {
|
|
27
|
+
entry = { crashes: 0, other: 0 };
|
|
28
|
+
perKey.set(s.serviceKey, entry);
|
|
29
|
+
}
|
|
30
|
+
if (isCrash) entry.crashes++;
|
|
31
|
+
else entry.other++;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
let ranked: { serviceKey: string; crashes: number; other: number }[] = [];
|
|
35
|
+
for (let [serviceKey, c] of perKey) {
|
|
36
|
+
ranked.push({ serviceKey, crashes: c.crashes, other: c.other });
|
|
37
|
+
}
|
|
38
|
+
ranked.sort((a, b) => {
|
|
39
|
+
if (b.crashes !== a.crashes) return b.crashes - a.crashes;
|
|
40
|
+
return (b.crashes + b.other) - (a.crashes + a.other);
|
|
41
|
+
});
|
|
42
|
+
let top = ranked[0];
|
|
43
|
+
|
|
44
|
+
let title = `Launches in last ${SINCE_DAYS} days: ${totalCrashes} crashed, ${totalOther} other`;
|
|
45
|
+
for (let r of ranked) {
|
|
46
|
+
title += `\n${r.serviceKey}: ${r.crashes} crashed, ${r.other} other`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return <div title={title} className={css.hbox(6).colorhsl(0, 0, 20)}>
|
|
50
|
+
<span>🚀</span>
|
|
51
|
+
<span>
|
|
52
|
+
<span className={css.colorhsl(CRASH_HUE, 70, 35).boldStyle}>{totalCrashes}</span>
|
|
53
|
+
<span className={css.colorhsl(0, 0, 55)}>|</span>
|
|
54
|
+
<span className={css.colorhsl(OTHER_HUE, 65, 35).boldStyle}>{totalOther}</span>
|
|
55
|
+
</span>
|
|
56
|
+
{top && top.crashes > 0 &&
|
|
57
|
+
<span className={css.colorhsl(0, 0, 40)}>
|
|
58
|
+
(<span className={css.colorhsl(CRASH_HUE, 70, 35)}>{top.crashes}</span>
|
|
59
|
+
<span className={css.colorhsl(0, 0, 55)}>|</span>
|
|
60
|
+
<span className={css.colorhsl(OTHER_HUE, 65, 35)}>{top.other}</span>)
|
|
61
|
+
</span>
|
|
62
|
+
}
|
|
63
|
+
</div>;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
@@ -4,7 +4,7 @@ import { measureWrap } from "socket-function/src/profiling/measure";
|
|
|
4
4
|
import { getOwnMachineId } from "../-a-auth/certs";
|
|
5
5
|
import { forceRemoveNode, getOurNodeId, getOurNodeIdAssert } from "../-f-node-discovery/NodeDiscovery";
|
|
6
6
|
import { Querysub } from "../4-querysub/QuerysubController";
|
|
7
|
-
import { MACHINE_RESYNC_INTERVAL, MachineServiceControllerBase, MachineInfo, ServiceConfig, serviceConfigs, SERVICE_FOLDER, machineInfos, SERVICE_NODE_FILE_NAME, getEffectiveServiceConfigs } from "./machineSchema";
|
|
7
|
+
import { MACHINE_RESYNC_INTERVAL, MachineServiceControllerBase, MachineInfo, ServiceConfig, serviceConfigs, SERVICE_FOLDER, machineInfos, SERVICE_NODE_FILE_NAME, getEffectiveServiceConfigs, recordLaunch } from "./machineSchema";
|
|
8
8
|
import { runPromise } from "../functional/runCommand";
|
|
9
9
|
import { getExternalIP } from "socket-function/src/networking";
|
|
10
10
|
import { errorToUndefined, errorToUndefinedSilent } from "../errors";
|
|
@@ -183,25 +183,21 @@ export async function streamScreenOutput(config: {
|
|
|
183
183
|
const pipeFile = `${root}${screenName}/pipe.txt`;
|
|
184
184
|
const tailScript = `${root}${screenName}/smart_tail.sh`;
|
|
185
185
|
|
|
186
|
-
// Create a smart tail script that handles file truncation
|
|
186
|
+
// Create a smart tail script that handles file truncation. It does
|
|
187
|
+
// NOT emit the initial backlog - that is read and sent directly by
|
|
188
|
+
// this process (as a single message) below. The script is given a
|
|
189
|
+
// start byte offset and only emits content appended after it.
|
|
187
190
|
await fs.promises.writeFile(tailScript, `#!/bin/bash
|
|
188
191
|
PIPE_FILE="$1"
|
|
192
|
+
START_POS="$2"
|
|
189
193
|
|
|
190
|
-
#
|
|
191
|
-
CURRENT_POS
|
|
194
|
+
# Position tracking starts at the offset we already read directly.
|
|
195
|
+
CURRENT_POS=$START_POS
|
|
196
|
+
CURRENT_SIZE=$START_POS
|
|
192
197
|
LAST_MTIME=""
|
|
193
198
|
|
|
194
|
-
# Read initial content and get file size
|
|
195
199
|
if [ -f "$PIPE_FILE" ]; then
|
|
196
|
-
CURRENT_SIZE=$(stat -c%s "$PIPE_FILE" 2>/dev/null || wc -c < "$PIPE_FILE")
|
|
197
200
|
LAST_MTIME=$(stat -c%Y "$PIPE_FILE" 2>/dev/null || stat -f%m "$PIPE_FILE" 2>/dev/null || echo "0")
|
|
198
|
-
# Output initial content like tail would
|
|
199
|
-
if [ $CURRENT_SIZE -gt 0 ]; then
|
|
200
|
-
cat "$PIPE_FILE"
|
|
201
|
-
CURRENT_POS=$CURRENT_SIZE
|
|
202
|
-
fi
|
|
203
|
-
else
|
|
204
|
-
CURRENT_SIZE=0
|
|
205
201
|
fi
|
|
206
202
|
|
|
207
203
|
# Poll for file changes every 250ms
|
|
@@ -237,8 +233,21 @@ done`);
|
|
|
237
233
|
|
|
238
234
|
await runPromise(`chmod +x ${tailScript}`);
|
|
239
235
|
|
|
240
|
-
//
|
|
241
|
-
|
|
236
|
+
// Read the existing backlog ourselves and deliver it as a single
|
|
237
|
+
// onData call. If we let the tail script cat it, the stream chunks
|
|
238
|
+
// it and runInSerial dribbles it out one round-trip at a time.
|
|
239
|
+
let initialContent = "";
|
|
240
|
+
let initialByteSize = 0;
|
|
241
|
+
try {
|
|
242
|
+
initialContent = await fs.promises.readFile(pipeFile, "utf8");
|
|
243
|
+
initialByteSize = Buffer.byteLength(initialContent, "utf8");
|
|
244
|
+
} catch {
|
|
245
|
+
// pipe.txt may not exist yet; the tail script will pick it up.
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// The tail script emits only content appended after initialByteSize,
|
|
249
|
+
// so the backlog we just read is never sent twice.
|
|
250
|
+
childProcess = spawn("bash", [tailScript, pipeFile, String(initialByteSize)], {
|
|
242
251
|
stdio: "pipe",
|
|
243
252
|
});
|
|
244
253
|
|
|
@@ -262,6 +271,13 @@ done`);
|
|
|
262
271
|
started.reject(err);
|
|
263
272
|
});
|
|
264
273
|
|
|
274
|
+
if (initialContent) {
|
|
275
|
+
// Queued synchronously here, before any stdout 'data' event can
|
|
276
|
+
// fire, so the backlog is always delivered ahead of new output.
|
|
277
|
+
started.resolve();
|
|
278
|
+
void onDataWrapped(initialContent);
|
|
279
|
+
}
|
|
280
|
+
|
|
265
281
|
await started.promise;
|
|
266
282
|
} catch (e) {
|
|
267
283
|
void stop();
|
|
@@ -725,6 +741,21 @@ const resyncServicesBase = runInSerial(measureWrap(async function resyncServices
|
|
|
725
741
|
|
|
726
742
|
await fs.promises.writeFile(parameterPath, newParametersString);
|
|
727
743
|
|
|
744
|
+
let launchReason: "crashed" | "update";
|
|
745
|
+
if (!sameParameters) {
|
|
746
|
+
launchReason = "update";
|
|
747
|
+
} else {
|
|
748
|
+
launchReason = "crashed";
|
|
749
|
+
}
|
|
750
|
+
void recordLaunch({
|
|
751
|
+
serviceId: config.serviceId,
|
|
752
|
+
serviceKey: config.parameters.key,
|
|
753
|
+
screenName,
|
|
754
|
+
machineId,
|
|
755
|
+
reason: launchReason,
|
|
756
|
+
time: Date.now(),
|
|
757
|
+
});
|
|
758
|
+
|
|
728
759
|
await runScreenCommand({
|
|
729
760
|
screenName,
|
|
730
761
|
command: config.parameters.command,
|
|
@@ -839,6 +870,92 @@ async function getPPID(pid: string) {
|
|
|
839
870
|
}
|
|
840
871
|
}
|
|
841
872
|
|
|
873
|
+
// Node prints this when the process has exited but the V8 inspector is still
|
|
874
|
+
// holding it open waiting for a debugger client to detach. If a debugger client
|
|
875
|
+
// drops uncleanly the process can sit on this forever, holding the screen and
|
|
876
|
+
// preventing the supervisor from spawning a fresh instance. The watcher below
|
|
877
|
+
// polls each screen's pipe.txt for this phrase and sends Ctrl+C to break it out.
|
|
878
|
+
const DEBUGGER_DISCONNECT_MESSAGE = "Waiting for the debugger to disconnect...";
|
|
879
|
+
|
|
880
|
+
// To avoid false positives from app code that happens to log a string
|
|
881
|
+
// containing the phrase, the wedge is only reported when:
|
|
882
|
+
// 1. the phrase is on its own line (start-of-string or after a newline), AND
|
|
883
|
+
// 2. nothing else has been logged after it (only trailing whitespace/EOF).
|
|
884
|
+
// In a real wedge, this line is the very last thing Node writes before going
|
|
885
|
+
// silent — so any later output rules out the wedge interpretation.
|
|
886
|
+
const DEBUGGER_DISCONNECT_TAIL_PATTERN = /(?:^|\r?\n)Waiting for the debugger to disconnect\.\.\.\s*$/;
|
|
887
|
+
|
|
888
|
+
// How often the wedge watcher scans every service's pipe.txt.
|
|
889
|
+
const DEBUGGER_WEDGE_POLL_INTERVAL = timeInSecond * 15;
|
|
890
|
+
// How much of the tail of pipe.txt to read on each scan. The phrase is short
|
|
891
|
+
// and appears near the very end of the log when a process is stuck, so a few
|
|
892
|
+
// KB is plenty and keeps scans cheap.
|
|
893
|
+
const DEBUGGER_WEDGE_TAIL_BYTES = 8 * 1024;
|
|
894
|
+
// Suppress repeated Ctrl+C bursts to the same screen while it tears down; if
|
|
895
|
+
// the first signal didn't break the wait, give it this long before we try again.
|
|
896
|
+
const DEBUGGER_WEDGE_RESIGNAL_DELAY = timeInMinute;
|
|
897
|
+
// After a first match, wait this long and re-check before signaling. If the
|
|
898
|
+
// process is still alive and just happened to log a message ending in the
|
|
899
|
+
// phrase, more output will appear in the meantime and the second check fails.
|
|
900
|
+
const DEBUGGER_WEDGE_RECHECK_DELAY = timeInSecond * 3;
|
|
901
|
+
|
|
902
|
+
// screenName -> last time we sent Ctrl+C because of a debugger wedge.
|
|
903
|
+
const lastDebuggerWedgeSignal = new Map<string, number>();
|
|
904
|
+
|
|
905
|
+
async function readPipeFileTail(pipeFile: string, maxBytes: number): Promise<string> {
|
|
906
|
+
let handle: fs.promises.FileHandle | undefined;
|
|
907
|
+
try {
|
|
908
|
+
handle = await fs.promises.open(pipeFile, "r");
|
|
909
|
+
let stat = await handle.stat();
|
|
910
|
+
let start = Math.max(0, stat.size - maxBytes);
|
|
911
|
+
let length = stat.size - start;
|
|
912
|
+
if (length <= 0) return "";
|
|
913
|
+
let buf = Buffer.alloc(length);
|
|
914
|
+
await handle.read(buf, 0, length, start);
|
|
915
|
+
return buf.toString("utf8");
|
|
916
|
+
} catch {
|
|
917
|
+
// File doesn't exist yet, or got truncated mid-read — treat as empty.
|
|
918
|
+
return "";
|
|
919
|
+
} finally {
|
|
920
|
+
if (handle) {
|
|
921
|
+
try {
|
|
922
|
+
await handle.close();
|
|
923
|
+
} catch {
|
|
924
|
+
// ignore
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
async function unwedgeStuckDebuggerScreens(): Promise<void> {
|
|
931
|
+
let prefix = getTmuxPrefix();
|
|
932
|
+
let screens = await getScreenState(false);
|
|
933
|
+
for (let { screenName } of screens) {
|
|
934
|
+
let pipeFile = os.homedir() + "/" + SERVICE_FOLDER + screenName + "/pipe.txt";
|
|
935
|
+
let tail1 = await readPipeFileTail(pipeFile, DEBUGGER_WEDGE_TAIL_BYTES);
|
|
936
|
+
if (!DEBUGGER_DISCONNECT_TAIL_PATTERN.test(tail1)) continue;
|
|
937
|
+
|
|
938
|
+
// Confirm the process is actually wedged and not just briefly idle by
|
|
939
|
+
// re-checking after a short delay. A live process will write more
|
|
940
|
+
// output in this window, which moves the phrase away from the end
|
|
941
|
+
// and fails the second match.
|
|
942
|
+
await delay(DEBUGGER_WEDGE_RECHECK_DELAY);
|
|
943
|
+
let tail2 = await readPipeFileTail(pipeFile, DEBUGGER_WEDGE_TAIL_BYTES);
|
|
944
|
+
if (!DEBUGGER_DISCONNECT_TAIL_PATTERN.test(tail2)) continue;
|
|
945
|
+
|
|
946
|
+
let last = lastDebuggerWedgeSignal.get(screenName) ?? 0;
|
|
947
|
+
if (Date.now() - last < DEBUGGER_WEDGE_RESIGNAL_DELAY) continue;
|
|
948
|
+
lastDebuggerWedgeSignal.set(screenName, Date.now());
|
|
949
|
+
|
|
950
|
+
console.warn(red(`Detected stuck "${DEBUGGER_DISCONNECT_MESSAGE}" in ${screenName} (confirmed across two checks ${DEBUGGER_WEDGE_RECHECK_DELAY}ms apart); sending Ctrl+C to unblock it.`));
|
|
951
|
+
try {
|
|
952
|
+
await runPromise(`${prefix}tmux send-keys -t ${screenName} 'C-c' Enter`);
|
|
953
|
+
} catch (e: any) {
|
|
954
|
+
console.warn(`Failed to send Ctrl+C to ${screenName}: ${e.stack ?? e}`);
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
|
|
842
959
|
export async function machineApplyMain() {
|
|
843
960
|
let parentPID = process.argv[2];
|
|
844
961
|
// Wait for the console to get shimmed
|
|
@@ -867,6 +984,14 @@ export async function machineApplyMain() {
|
|
|
867
984
|
await Querysub.hostService("machine-apply");
|
|
868
985
|
onServiceConfigChange(resyncServices);
|
|
869
986
|
|
|
987
|
+
runInfinitePoll(DEBUGGER_WEDGE_POLL_INTERVAL, async () => {
|
|
988
|
+
try {
|
|
989
|
+
await unwedgeStuckDebuggerScreens();
|
|
990
|
+
} catch (e: any) {
|
|
991
|
+
console.error(`Error in debugger-wedge watcher: ${e.stack ?? e}`);
|
|
992
|
+
}
|
|
993
|
+
});
|
|
994
|
+
|
|
870
995
|
runInfinitePoll(timeInMinute * 3, async () => {
|
|
871
996
|
//console.log(magenta(`Quick outdated check at ${new Date().toISOString()}`));
|
|
872
997
|
// console.log(magenta("Likely outdated, resyncing now"));
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { isNodeTrue, list, timeInMinute, timeInSecond } from "socket-function/src/misc";
|
|
1
|
+
import { isNodeTrue, list, timeInDay, timeInMinute, timeInSecond } from "socket-function/src/misc";
|
|
2
2
|
import { nestArchives } from "../-a-archives/archives";
|
|
3
3
|
import { getArchivesBackblaze } from "../-a-archives/archivesBackBlaze";
|
|
4
4
|
import { getDomain } from "../config";
|
|
@@ -103,6 +103,69 @@ export const machineInfos = archiveJSONT<MachineInfo>(() => nestArchives("machin
|
|
|
103
103
|
export const serviceConfigs = archiveJSONT<ServiceConfig>(() => nestArchives("machines/service-configs/", getArchivesBackblaze(getDomain())));
|
|
104
104
|
export const machineConfigs = archiveJSONT<MachineConfig>(() => nestArchives("machines/machine-configs/", getArchivesBackblaze(getDomain())));
|
|
105
105
|
|
|
106
|
+
export type LaunchRecord = {
|
|
107
|
+
serviceId: string;
|
|
108
|
+
serviceKey: string;
|
|
109
|
+
screenName: string;
|
|
110
|
+
machineId: string;
|
|
111
|
+
reason: "crashed" | "update";
|
|
112
|
+
time: number;
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
export type LaunchSummary = {
|
|
116
|
+
time: number;
|
|
117
|
+
reason: string;
|
|
118
|
+
serviceKey: string;
|
|
119
|
+
key: string;
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const launches = lazy(() => nestArchives("machines/launches/", getArchivesBackblaze(getDomain())));
|
|
123
|
+
|
|
124
|
+
function formatLaunchDay(time: number): string {
|
|
125
|
+
let date = new Date(time);
|
|
126
|
+
let y = date.getUTCFullYear();
|
|
127
|
+
let m = String(date.getUTCMonth() + 1).padStart(2, "0");
|
|
128
|
+
let d = String(date.getUTCDate()).padStart(2, "0");
|
|
129
|
+
return `${y}-${m}-${d}`;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export async function recordLaunch(record: LaunchRecord) {
|
|
133
|
+
let day = formatLaunchDay(record.time);
|
|
134
|
+
let key = `${day}/${record.time}_${record.reason}_${record.serviceKey}`;
|
|
135
|
+
await launches().set(key, Buffer.from(JSON.stringify(record)));
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function parseLaunchKey(key: string): LaunchSummary {
|
|
139
|
+
let rest = key;
|
|
140
|
+
let slash = key.indexOf("/");
|
|
141
|
+
if (slash >= 0) {
|
|
142
|
+
rest = key.slice(slash + 1);
|
|
143
|
+
}
|
|
144
|
+
let timeStr = "";
|
|
145
|
+
let afterTime = rest;
|
|
146
|
+
let firstUnderscore = rest.indexOf("_");
|
|
147
|
+
if (firstUnderscore >= 0) {
|
|
148
|
+
timeStr = rest.slice(0, firstUnderscore);
|
|
149
|
+
afterTime = rest.slice(firstUnderscore + 1);
|
|
150
|
+
}
|
|
151
|
+
let reason = afterTime;
|
|
152
|
+
let serviceKey = "";
|
|
153
|
+
let secondUnderscore = afterTime.indexOf("_");
|
|
154
|
+
if (secondUnderscore >= 0) {
|
|
155
|
+
reason = afterTime.slice(0, secondUnderscore);
|
|
156
|
+
serviceKey = afterTime.slice(secondUnderscore + 1);
|
|
157
|
+
}
|
|
158
|
+
let time = Number(timeStr);
|
|
159
|
+
if (!Number.isFinite(time)) {
|
|
160
|
+
console.warn(`Unparseable launch key (bad time): ${key}`);
|
|
161
|
+
time = Date.now();
|
|
162
|
+
}
|
|
163
|
+
if (reason !== "crashed" && reason !== "update") {
|
|
164
|
+
console.warn(`Launch key has unexpected reason "${reason}": ${key}`);
|
|
165
|
+
}
|
|
166
|
+
return { time, reason, serviceKey, key };
|
|
167
|
+
}
|
|
168
|
+
|
|
106
169
|
export function doRegisterNodeForMachineCleanup() {
|
|
107
170
|
if (isNode()) {
|
|
108
171
|
void SocketFunction.mountPromise.finally(() => {
|
|
@@ -371,6 +434,22 @@ export class MachineServiceControllerBase {
|
|
|
371
434
|
});
|
|
372
435
|
}
|
|
373
436
|
|
|
437
|
+
public async getRecentLaunches(sinceDays: number): Promise<LaunchSummary[]> {
|
|
438
|
+
let now = Date.now();
|
|
439
|
+
let dayStrings: string[] = [];
|
|
440
|
+
for (let i = 0; i <= sinceDays; i++) {
|
|
441
|
+
dayStrings.push(formatLaunchDay(now - i * timeInDay));
|
|
442
|
+
}
|
|
443
|
+
let keyLists = await Promise.all(dayStrings.map(day => launches().find(`${day}/`)));
|
|
444
|
+
let summaries: LaunchSummary[] = [];
|
|
445
|
+
for (let keys of keyLists) {
|
|
446
|
+
for (let key of keys) {
|
|
447
|
+
summaries.push(parseLaunchKey(key));
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
return summaries;
|
|
451
|
+
}
|
|
452
|
+
|
|
374
453
|
public async deployFunctions(config: {
|
|
375
454
|
functionSpecs: FunctionSpec[];
|
|
376
455
|
prefixes: string[];
|
|
@@ -469,6 +548,7 @@ export const MachineServiceController = getSyncedController(
|
|
|
469
548
|
getPendingFunctions: {},
|
|
470
549
|
deployFunctions: {},
|
|
471
550
|
getLiveFunctions: {},
|
|
551
|
+
getRecentLaunches: {},
|
|
472
552
|
}),
|
|
473
553
|
() => ({
|
|
474
554
|
hooks: [assertIsManagementUser],
|
|
@@ -497,6 +577,7 @@ export const MachineServiceController = getSyncedController(
|
|
|
497
577
|
getGitInfo: ["gitInfo"],
|
|
498
578
|
getPendingFunctions: ["gitInfo"],
|
|
499
579
|
getLiveFunctions: ["gitInfo"],
|
|
580
|
+
getRecentLaunches: ["launches"],
|
|
500
581
|
}
|
|
501
582
|
}
|
|
502
583
|
);
|
|
@@ -142,7 +142,7 @@ class NodeConnectionsControllerBase {
|
|
|
142
142
|
}
|
|
143
143
|
|
|
144
144
|
public async getEntryPoint_forBrowser(nodeId: string) {
|
|
145
|
-
return await NodeCapabilitiesController.nodes[nodeId].
|
|
145
|
+
return (await NodeCapabilitiesController.nodes[nodeId].getMetadata()).entryPoint;
|
|
146
146
|
}
|
|
147
147
|
|
|
148
148
|
public async getAllNodeIds() {
|