querysub 0.450.0 → 0.452.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +6 -1
- package/bin/join-public.js +1 -0
- package/package.json +1 -1
- package/src/-a-archives/archiveCache.ts +53 -597
- package/src/-g-core-values/NodeCapabilities.ts +29 -28
- package/src/0-path-value-core/archiveLocks/ArchiveLocks2.ts +24 -0
- package/src/0-path-value-core/pathValueArchives.ts +28 -7
- package/src/0-path-value-core/pathValueCore.ts +1 -1
- package/src/2-proxy/PathValueProxyWatcher.ts +6 -6
- package/src/archiveapps/archiveGCEntry.tsx +1 -0
- package/src/archiveapps/archiveJoinEntry.ts +8 -2
- package/src/deployManager/LaunchTrackingHeader.tsx +69 -0
- package/src/deployManager/machineApplyMainCode.ts +46 -15
- package/src/deployManager/machineSchema.ts +82 -1
- package/src/diagnostics/NodeConnectionsPage.tsx +1 -1
- package/src/diagnostics/NodeViewer.tsx +15 -25
- package/src/diagnostics/debugger/mcp-server.ts +3 -3
- package/src/diagnostics/logs/IndexedLogs/IndexedLogs.ts +2 -2
- package/src/diagnostics/logs/IndexedLogs/MCPIndexedLogs.ts +64 -22
- package/src/diagnostics/logs/IndexedLogs/MCPIndexedLogsEntry.ts +32 -1
- package/src/diagnostics/managementPages.tsx +16 -0
- package/src/diagnostics/misc-pages/AuthoritySpecPage.tsx +112 -0
- package/src/diagnostics/pathAuditer.ts +0 -6
- package/test.ts +2 -1
- package/src/misc/getParentProcessId.cs +0 -53
- package/src/misc/getParentProcessId.ts +0 -53
|
@@ -12,6 +12,7 @@ import { delay } from "socket-function/src/batching";
|
|
|
12
12
|
import debugbreak from "debugbreak";
|
|
13
13
|
import { formatTime } from "socket-function/src/formatting/format";
|
|
14
14
|
import type { DebugFunctionShardInfo } from "../3-path-functions/PathFunctionRunner";
|
|
15
|
+
import type { AuthoritySpec } from "../0-path-value-core/PathRouter";
|
|
15
16
|
import { requiresNetworkTrustHook } from "../-d-trust/NetworkTrust2";
|
|
16
17
|
import { isNoNetwork } from "../config";
|
|
17
18
|
import { getDebuggerUrl } from "../diagnostics/listenOnDebugger";
|
|
@@ -94,10 +95,10 @@ export async function getControllerNodeIdList(
|
|
|
94
95
|
await Promise.all(nodeIdsToTest.map(async nodeId => {
|
|
95
96
|
let result = await doesNodeExposeController(nodeId, controller);
|
|
96
97
|
if (result) {
|
|
97
|
-
let
|
|
98
|
+
let metadata = await NodeCapabilitiesController.nodes[nodeId].getMetadata();
|
|
98
99
|
passedNodeIds.set(nodeId, {
|
|
99
100
|
machineId: getMachineId(nodeId),
|
|
100
|
-
entryPoint,
|
|
101
|
+
entryPoint: metadata.entryPoint,
|
|
101
102
|
});
|
|
102
103
|
}
|
|
103
104
|
}));
|
|
@@ -117,38 +118,43 @@ export async function getControllerNodeIdList(
|
|
|
117
118
|
|
|
118
119
|
|
|
119
120
|
export async function doesNodeExposeController(reconnectNodeId: string, controller: SocketRegistered<{}>): Promise<boolean> {
|
|
120
|
-
let
|
|
121
|
+
let metadata = await timeoutToUndefinedSilent(10_000, NodeCapabilitiesController.nodes[reconnectNodeId].getMetadata());
|
|
121
122
|
|
|
122
|
-
if (exposedControllers
|
|
123
|
+
if (metadata?.exposedControllers.includes(controller._classGuid)) {
|
|
123
124
|
return true;
|
|
124
125
|
}
|
|
125
126
|
return false;
|
|
126
127
|
}
|
|
127
128
|
|
|
128
129
|
const startupTime = Date.now();
|
|
130
|
+
|
|
131
|
+
export type NodeMetadata = {
|
|
132
|
+
entryPoint: string;
|
|
133
|
+
startupTime: number;
|
|
134
|
+
processId: number;
|
|
135
|
+
functionRunnerShards: DebugFunctionShardInfo[];
|
|
136
|
+
authoritySpec: AuthoritySpec;
|
|
137
|
+
exposedControllers: string[];
|
|
138
|
+
trueTimeOffset: number;
|
|
139
|
+
};
|
|
140
|
+
|
|
129
141
|
class NodeCapabilitiesControllerBase {
|
|
130
|
-
public async
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
142
|
+
public async getMetadata(): Promise<NodeMetadata> {
|
|
143
|
+
let { authorityLookup } = await import("../0-path-value-core/AuthorityLookup");
|
|
144
|
+
return {
|
|
145
|
+
entryPoint: process.argv[1],
|
|
146
|
+
startupTime,
|
|
147
|
+
processId: process.pid,
|
|
148
|
+
functionRunnerShards: getFunctionRunnerShards(),
|
|
149
|
+
authoritySpec: authorityLookup.getOurSpec(),
|
|
150
|
+
exposedControllers: Array.from(SocketFunction.exposedClasses),
|
|
151
|
+
trueTimeOffset: getTrueTimeOffset(),
|
|
152
|
+
};
|
|
138
153
|
}
|
|
154
|
+
|
|
139
155
|
public async getMemoryUsage() {
|
|
140
156
|
return process.memoryUsage();
|
|
141
157
|
}
|
|
142
|
-
public async getProcessId() {
|
|
143
|
-
return process.pid;
|
|
144
|
-
}
|
|
145
|
-
public async getFunctionRunnerShards() {
|
|
146
|
-
return getFunctionRunnerShards();
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
public async getTrueTimeOffset() {
|
|
150
|
-
return getTrueTimeOffset();
|
|
151
|
-
}
|
|
152
158
|
|
|
153
159
|
public async getInspectURL() {
|
|
154
160
|
return await getDebuggerUrl();
|
|
@@ -188,13 +194,8 @@ export const NodeCapabilitiesController = SocketFunction.register(
|
|
|
188
194
|
"NodeCapabilitiesController-399b7067-75c3-4d92-8be3-8470bde32d3c",
|
|
189
195
|
new NodeCapabilitiesControllerBase(),
|
|
190
196
|
() => ({
|
|
191
|
-
|
|
192
|
-
getEntryPoint: {},
|
|
193
|
-
getStartupTime: {},
|
|
197
|
+
getMetadata: {},
|
|
194
198
|
getMemoryUsage: {},
|
|
195
|
-
getProcessId: {},
|
|
196
|
-
getFunctionRunnerShards: {},
|
|
197
|
-
getTrueTimeOffset: {},
|
|
198
199
|
getInspectURL: { hooks: [requiresNetworkTrustHook] },
|
|
199
200
|
exposeExternalDebugPortOnce: { hooks: [requiresNetworkTrustHook] },
|
|
200
201
|
}),
|
|
@@ -245,7 +245,31 @@ export function createArchiveLocker2(config: {
|
|
|
245
245
|
newTransaction.ops.push({ type: "delete", key: obj.file });
|
|
246
246
|
}
|
|
247
247
|
}
|
|
248
|
+
// Per-op trace so a "written too slowly" crash on a specific file can be
|
|
249
|
+
// correlated against the exact transaction that touched it. The aggregate
|
|
250
|
+
// "Joining N => M" log doesn't name the files. We log before the attempt and
|
|
251
|
+
// again after with the resulting status (including "rejected").
|
|
252
|
+
const describeOp = (op: typeof newTransaction.ops[number]): string => {
|
|
253
|
+
let detail = "";
|
|
254
|
+
if (op.key.endsWith(".data") || op.key.endsWith(".data.locked")) {
|
|
255
|
+
try {
|
|
256
|
+
detail = ` source=${pathValueArchives.decodeDataPath(op.key).sourceType}`;
|
|
257
|
+
} catch (e) {
|
|
258
|
+
detail = ` decodeError=${(e as Error).message ?? e}`;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
return `${op.type} ${op.key}${detail}`;
|
|
262
|
+
};
|
|
263
|
+
for (let op of newTransaction.ops) {
|
|
264
|
+
console.info(`Transaction attempt: ${describeOp(op)}`);
|
|
265
|
+
}
|
|
266
|
+
|
|
248
267
|
let status = await locker.addTransaction(newTransaction);
|
|
268
|
+
|
|
269
|
+
for (let op of newTransaction.ops) {
|
|
270
|
+
console.info(`Transaction ${status}: ${describeOp(op)}`);
|
|
271
|
+
}
|
|
272
|
+
|
|
249
273
|
if (status === "accepted") {
|
|
250
274
|
let newFiles = new Set<string>();
|
|
251
275
|
for (let file of files) {
|
|
@@ -19,13 +19,15 @@ import { safeLoop } from "socket-function/src/batching";
|
|
|
19
19
|
import { errorToUndefined } from "../errors";
|
|
20
20
|
import { shutdown } from "../diagnostics/periodic";
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
// Kept separate from the cache-wrapped `archives` so we can probe the underlying archives
|
|
23
|
+
// directly (without the cache layer in the way) when diagnosing missing-file failures.
|
|
24
|
+
const archivesBase = lazy(() => getArchives("path-values/"));
|
|
25
|
+
export const archives = lazy(() => wrapArchivesWithCache(archivesBase()));
|
|
23
26
|
export const archivesLocks = lazy(() => getArchives("path-values-locks/"));
|
|
24
27
|
export const archivesRecycleBin = lazy(() => wrapArchivesWithCache(getArchives("path-values-recycle-bin/")));
|
|
25
28
|
|
|
26
|
-
//
|
|
27
|
-
//
|
|
28
|
-
// times to confirm it really is gone.
|
|
29
|
+
// If getInfo reports a just-written file as missing, recheck a few times before treating it
|
|
30
|
+
// as a fatal "written too slowly" condition.
|
|
29
31
|
const ARCHIVE_INFO_RECHECK_ATTEMPTS = 5;
|
|
30
32
|
const ARCHIVE_INFO_RECHECK_DELAY = 5000;
|
|
31
33
|
|
|
@@ -224,14 +226,33 @@ export class PathValueArchives {
|
|
|
224
226
|
let fileInfo = await archives().getInfo(fullPath);
|
|
225
227
|
// NOTE: If no fileInfo... then our file was merged? Which... is BAD, as it means we took
|
|
226
228
|
// too long to read it, so we probably took too long to write it too!
|
|
227
|
-
// Backblaze's getInfo can transiently report a just-written file as missing, so recheck
|
|
228
|
-
// a few times before treating a missing file as fatal.
|
|
229
229
|
for (let attempt = 0; !fileInfo && attempt < ARCHIVE_INFO_RECHECK_ATTEMPTS; attempt++) {
|
|
230
230
|
await delay(ARCHIVE_INFO_RECHECK_DELAY);
|
|
231
231
|
fileInfo = await archives().getInfo(fullPath);
|
|
232
232
|
}
|
|
233
233
|
if (!fileInfo || fileInfo.writeTime > slowestFileWriteTime) {
|
|
234
|
-
|
|
234
|
+
// If we're crashing because getInfo reports the file as missing, probe the
|
|
235
|
+
// underlying (non-cached) archives -- both getInfo and an actual get() of the
|
|
236
|
+
// bytes -- and record the results in the crash log, so we can tell whether that
|
|
237
|
+
// info is consistent. We do NOT use this as a fallback; we still crash.
|
|
238
|
+
let missingDiagnostic = "";
|
|
239
|
+
if (!fileInfo) {
|
|
240
|
+
let baseInfoStr: string;
|
|
241
|
+
try {
|
|
242
|
+
baseInfoStr = JSON.stringify(await archivesBase().getInfo(fullPath)) ?? "undefined";
|
|
243
|
+
} catch (e) {
|
|
244
|
+
baseInfoStr = `threw ${(e as Error).stack ?? e}`;
|
|
245
|
+
}
|
|
246
|
+
let baseGetStr: string;
|
|
247
|
+
try {
|
|
248
|
+
let baseData = await archivesBase().get(fullPath);
|
|
249
|
+
baseGetStr = baseData ? `${baseData.byteLength}B` : "undefined";
|
|
250
|
+
} catch (e) {
|
|
251
|
+
baseGetStr = `threw ${(e as Error).stack ?? e}`;
|
|
252
|
+
}
|
|
253
|
+
missingDiagnostic = ` [underlying archives probe: getInfo=${baseInfoStr}, get=${baseGetStr}, expected ${data.byteLength}B]`;
|
|
254
|
+
}
|
|
255
|
+
console.error(red(`File ${fullPath} was written too slowly, ${fileInfo?.writeTime || "undefined"} < ${slowestFileWriteTime}.${missingDiagnostic} This means some values will be rejected by reads. Killing server, our state is irrecoverable. Our watches have invalid data, and we have to stop before we create more invalid dependencies.`));
|
|
235
256
|
await delay(5000);
|
|
236
257
|
try {
|
|
237
258
|
await shutdown();
|
|
@@ -68,7 +68,7 @@ export const MAX_CHANGE_AGE = MAX_ACCEPTED_CHANGE_AGE * 2;
|
|
|
68
68
|
/** Extra time we keep clientside prediction rejections for, to give us time to receive the actual values. */
|
|
69
69
|
export const CLIENTSIDE_PREDICT_LEEWAY = 500;
|
|
70
70
|
|
|
71
|
-
/** Any PathValues which take longer than this to
|
|
71
|
+
/** Any PathValues which take longer than this to write should be rejected, so... we have
|
|
72
72
|
* to write well before this time.
|
|
73
73
|
* - This has to be at least MAX_CHANGE_AGE * 4.5 + the time to serialize and
|
|
74
74
|
* send our data to remote storage.
|
|
@@ -1472,8 +1472,8 @@ export class PathValueProxyWatcher {
|
|
|
1472
1472
|
let notWatchingUnsyncedParent = reallyUnsyncedParentAccesses.filter(x => !remoteWatcher.debugIsWatchingPath(x));
|
|
1473
1473
|
if (notWatchingUnsynced.length !== 0 || notWatchingUnsyncedParent.length !== 0) {
|
|
1474
1474
|
console.error((`${red("WATCHER FAILED TO SYNC")} ${watcher.debugName} ${magenta("NOT REMOTE WATCHING REQUIRED PATHS")}. This means our sync or unsync (likely unsync) logic is broken, in remoteWatcher/clientWatcher. OR, there were no read nodes when we tried to sync (we don't handle missing read nodes correctly at the moment)`), { notWatchingUnsynced, notWatchingUnsyncedParent }, watcher.options.watchFunction);
|
|
1475
|
-
debugbreak(2);
|
|
1476
|
-
debugger;
|
|
1475
|
+
// debugbreak(2);
|
|
1476
|
+
// debugger;
|
|
1477
1477
|
} else {
|
|
1478
1478
|
console.error((`${red("WATCHER FAILED TO SYNC")} ${watcher.debugName} ${magenta("DID NOT RECEIVE PATH VALUES")}. This means PathValueServer is not responding to watches, either to specific paths, or for all paths`), { reallyUnsyncedAccesses, reallyUnsyncedParentAccesses }, watcher.options.watchFunction);
|
|
1479
1479
|
// debugbreak(2);
|
|
@@ -1481,12 +1481,12 @@ export class PathValueProxyWatcher {
|
|
|
1481
1481
|
}
|
|
1482
1482
|
} else if (watcher.lastSpecialPromiseUnsynced) {
|
|
1483
1483
|
console.warn((`${yellow("WATCHER SLOW TO SYNC")} ${watcher.debugName} ${magenta("DEPENDENT PROMISE NEVER RESOLVED")}. This promise might resolve, but it probably won't. Slow promises should be detached from the watcher system and use multiple watchers/writes, instead of blocking on a promise.`), watcher.lastSpecialPromiseUnsyncedReason, watcher.options.watchFunction);
|
|
1484
|
-
debugbreak(2);
|
|
1485
|
-
debugger;
|
|
1484
|
+
// debugbreak(2);
|
|
1485
|
+
// debugger;
|
|
1486
1486
|
} else {
|
|
1487
1487
|
console.error((`${red("WATCHER FAILED TO SYNC")} ${watcher.debugName} ${magenta("DID NOT TRIGGER WATCHER")}. This means either ProxyWatcher is broken (and isn't triggering when it should, or isn't watching when it should), or ClientWatcher/PathWatcher are broken and are not properly informing callers of watchers.`), { lastUnsyncedAccesses: watcher.lastUnsyncedAccesses, lastUnsyncedParentAccesses: watcher.lastUnsyncedParentAccesses }, watcher.options.watchFunction);
|
|
1488
|
-
debugbreak(2);
|
|
1489
|
-
debugger;
|
|
1488
|
+
// debugbreak(2);
|
|
1489
|
+
// debugger;
|
|
1490
1490
|
}
|
|
1491
1491
|
}, 60000);
|
|
1492
1492
|
}
|
|
@@ -2,7 +2,7 @@ import "../inject";
|
|
|
2
2
|
|
|
3
3
|
import { logErrors } from "../errors";
|
|
4
4
|
import { PathValueArchives, pathValueArchives } from "../0-path-value-core/pathValueArchives";
|
|
5
|
-
import { PathValue, VALUE_GC_THRESHOLD } from "../0-path-value-core/pathValueCore";
|
|
5
|
+
import { ARCHIVE_FLUSH_LIMIT, PathValue, VALUE_GC_THRESHOLD } from "../0-path-value-core/pathValueCore";
|
|
6
6
|
import { runInfinitePollCallAtStart } from "socket-function/src/batching";
|
|
7
7
|
import { measureBlock } from "socket-function/src/profiling/measure";
|
|
8
8
|
import { pathValueSerializer } from "../-h-path-value-serialize/PathValueSerializer";
|
|
@@ -41,7 +41,12 @@ async function runGenesisJoinIteration(config?: { force?: boolean }) {
|
|
|
41
41
|
valueFiles = valueFiles.filter(x => {
|
|
42
42
|
let obj = pathValueArchives.decodeDataPath(x.file);
|
|
43
43
|
if (!obj.minTime) return false;
|
|
44
|
-
|
|
44
|
+
if (obj.sourceType === "genesis") {
|
|
45
|
+
// Has to be old enough
|
|
46
|
+
return Date.now() - obj.time < ARCHIVE_FLUSH_LIMIT;
|
|
47
|
+
}
|
|
48
|
+
// Anything else can be merged immediately.
|
|
49
|
+
return true;
|
|
45
50
|
});
|
|
46
51
|
}
|
|
47
52
|
let withinTimeRangeCount = valueFiles.length;
|
|
@@ -123,6 +128,7 @@ async function main() {
|
|
|
123
128
|
await Querysub.hostService("join");
|
|
124
129
|
|
|
125
130
|
if (yargObj.watch) {
|
|
131
|
+
console.log("Running in watch mode.");
|
|
126
132
|
await runInfinitePollCallAtStart(VALUE_GC_THRESHOLD * 0.8, runGenesisJoinIteration);
|
|
127
133
|
} else {
|
|
128
134
|
try {
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
module.allowclient = true;
|
|
2
|
+
|
|
3
|
+
import { qreact } from "../4-dom/qreact";
|
|
4
|
+
import { css } from "typesafecss";
|
|
5
|
+
import { getBrowserUrlNode } from "../-f-node-discovery/NodeDiscovery";
|
|
6
|
+
import { MachineServiceController } from "./machineSchema";
|
|
7
|
+
|
|
8
|
+
const SINCE_DAYS = 7;
|
|
9
|
+
const CRASH_HUE = 0;
|
|
10
|
+
const OTHER_HUE = 210;
|
|
11
|
+
|
|
12
|
+
export class LaunchTrackingHeader extends qreact.Component {
|
|
13
|
+
render() {
|
|
14
|
+
let summaries = MachineServiceController(getBrowserUrlNode()).getRecentLaunches(SINCE_DAYS);
|
|
15
|
+
if (!summaries) return undefined;
|
|
16
|
+
if (summaries.length === 0) return undefined;
|
|
17
|
+
|
|
18
|
+
let totalCrashes = 0;
|
|
19
|
+
let totalOther = 0;
|
|
20
|
+
let perKey = new Map<string, { crashes: number; other: number }>();
|
|
21
|
+
for (let s of summaries) {
|
|
22
|
+
let isCrash = s.reason === "crashed";
|
|
23
|
+
if (isCrash) totalCrashes++;
|
|
24
|
+
else totalOther++;
|
|
25
|
+
let entry = perKey.get(s.serviceKey);
|
|
26
|
+
if (!entry) {
|
|
27
|
+
entry = { crashes: 0, other: 0 };
|
|
28
|
+
perKey.set(s.serviceKey, entry);
|
|
29
|
+
}
|
|
30
|
+
if (isCrash) entry.crashes++;
|
|
31
|
+
else entry.other++;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
let top: { serviceKey: string; crashes: number; other: number } | undefined;
|
|
35
|
+
for (let [serviceKey, c] of perKey) {
|
|
36
|
+
let candidate = { serviceKey, crashes: c.crashes, other: c.other };
|
|
37
|
+
if (!top) {
|
|
38
|
+
top = candidate;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (candidate.crashes > top.crashes) {
|
|
42
|
+
top = candidate;
|
|
43
|
+
} else if (candidate.crashes === top.crashes && (candidate.crashes + candidate.other) > (top.crashes + top.other)) {
|
|
44
|
+
top = candidate;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
let title = `Launches in last ${SINCE_DAYS} days: ${totalCrashes} crashed, ${totalOther} other`;
|
|
49
|
+
if (top && top.crashes > 0) {
|
|
50
|
+
title += ` -- top by crashes: ${top.serviceKey} (${top.crashes} crashed, ${top.other} other)`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return <div title={title} className={css.hbox(6).colorhsl(0, 0, 20)}>
|
|
54
|
+
<span>🚀</span>
|
|
55
|
+
<span>
|
|
56
|
+
<span className={css.colorhsl(CRASH_HUE, 70, 35).boldStyle}>{totalCrashes}</span>
|
|
57
|
+
<span className={css.colorhsl(0, 0, 55)}>|</span>
|
|
58
|
+
<span className={css.colorhsl(OTHER_HUE, 65, 35).boldStyle}>{totalOther}</span>
|
|
59
|
+
</span>
|
|
60
|
+
{top && top.crashes > 0 &&
|
|
61
|
+
<span className={css.colorhsl(0, 0, 40)}>
|
|
62
|
+
(<span className={css.colorhsl(CRASH_HUE, 70, 35)}>{top.crashes}</span>
|
|
63
|
+
<span className={css.colorhsl(0, 0, 55)}>|</span>
|
|
64
|
+
<span className={css.colorhsl(OTHER_HUE, 65, 35)}>{top.other}</span>)
|
|
65
|
+
</span>
|
|
66
|
+
}
|
|
67
|
+
</div>;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -4,7 +4,7 @@ import { measureWrap } from "socket-function/src/profiling/measure";
|
|
|
4
4
|
import { getOwnMachineId } from "../-a-auth/certs";
|
|
5
5
|
import { forceRemoveNode, getOurNodeId, getOurNodeIdAssert } from "../-f-node-discovery/NodeDiscovery";
|
|
6
6
|
import { Querysub } from "../4-querysub/QuerysubController";
|
|
7
|
-
import { MACHINE_RESYNC_INTERVAL, MachineServiceControllerBase, MachineInfo, ServiceConfig, serviceConfigs, SERVICE_FOLDER, machineInfos, SERVICE_NODE_FILE_NAME, getEffectiveServiceConfigs } from "./machineSchema";
|
|
7
|
+
import { MACHINE_RESYNC_INTERVAL, MachineServiceControllerBase, MachineInfo, ServiceConfig, serviceConfigs, SERVICE_FOLDER, machineInfos, SERVICE_NODE_FILE_NAME, getEffectiveServiceConfigs, recordLaunch } from "./machineSchema";
|
|
8
8
|
import { runPromise } from "../functional/runCommand";
|
|
9
9
|
import { getExternalIP } from "socket-function/src/networking";
|
|
10
10
|
import { errorToUndefined, errorToUndefinedSilent } from "../errors";
|
|
@@ -183,25 +183,21 @@ export async function streamScreenOutput(config: {
|
|
|
183
183
|
const pipeFile = `${root}${screenName}/pipe.txt`;
|
|
184
184
|
const tailScript = `${root}${screenName}/smart_tail.sh`;
|
|
185
185
|
|
|
186
|
-
// Create a smart tail script that handles file truncation
|
|
186
|
+
// Create a smart tail script that handles file truncation. It does
|
|
187
|
+
// NOT emit the initial backlog - that is read and sent directly by
|
|
188
|
+
// this process (as a single message) below. The script is given a
|
|
189
|
+
// start byte offset and only emits content appended after it.
|
|
187
190
|
await fs.promises.writeFile(tailScript, `#!/bin/bash
|
|
188
191
|
PIPE_FILE="$1"
|
|
192
|
+
START_POS="$2"
|
|
189
193
|
|
|
190
|
-
#
|
|
191
|
-
CURRENT_POS
|
|
194
|
+
# Position tracking starts at the offset we already read directly.
|
|
195
|
+
CURRENT_POS=$START_POS
|
|
196
|
+
CURRENT_SIZE=$START_POS
|
|
192
197
|
LAST_MTIME=""
|
|
193
198
|
|
|
194
|
-
# Read initial content and get file size
|
|
195
199
|
if [ -f "$PIPE_FILE" ]; then
|
|
196
|
-
CURRENT_SIZE=$(stat -c%s "$PIPE_FILE" 2>/dev/null || wc -c < "$PIPE_FILE")
|
|
197
200
|
LAST_MTIME=$(stat -c%Y "$PIPE_FILE" 2>/dev/null || stat -f%m "$PIPE_FILE" 2>/dev/null || echo "0")
|
|
198
|
-
# Output initial content like tail would
|
|
199
|
-
if [ $CURRENT_SIZE -gt 0 ]; then
|
|
200
|
-
cat "$PIPE_FILE"
|
|
201
|
-
CURRENT_POS=$CURRENT_SIZE
|
|
202
|
-
fi
|
|
203
|
-
else
|
|
204
|
-
CURRENT_SIZE=0
|
|
205
201
|
fi
|
|
206
202
|
|
|
207
203
|
# Poll for file changes every 250ms
|
|
@@ -237,8 +233,21 @@ done`);
|
|
|
237
233
|
|
|
238
234
|
await runPromise(`chmod +x ${tailScript}`);
|
|
239
235
|
|
|
240
|
-
//
|
|
241
|
-
|
|
236
|
+
// Read the existing backlog ourselves and deliver it as a single
|
|
237
|
+
// onData call. If we let the tail script cat it, the stream chunks
|
|
238
|
+
// it and runInSerial dribbles it out one round-trip at a time.
|
|
239
|
+
let initialContent = "";
|
|
240
|
+
let initialByteSize = 0;
|
|
241
|
+
try {
|
|
242
|
+
initialContent = await fs.promises.readFile(pipeFile, "utf8");
|
|
243
|
+
initialByteSize = Buffer.byteLength(initialContent, "utf8");
|
|
244
|
+
} catch {
|
|
245
|
+
// pipe.txt may not exist yet; the tail script will pick it up.
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// The tail script emits only content appended after initialByteSize,
|
|
249
|
+
// so the backlog we just read is never sent twice.
|
|
250
|
+
childProcess = spawn("bash", [tailScript, pipeFile, String(initialByteSize)], {
|
|
242
251
|
stdio: "pipe",
|
|
243
252
|
});
|
|
244
253
|
|
|
@@ -262,6 +271,13 @@ done`);
|
|
|
262
271
|
started.reject(err);
|
|
263
272
|
});
|
|
264
273
|
|
|
274
|
+
if (initialContent) {
|
|
275
|
+
// Queued synchronously here, before any stdout 'data' event can
|
|
276
|
+
// fire, so the backlog is always delivered ahead of new output.
|
|
277
|
+
started.resolve();
|
|
278
|
+
void onDataWrapped(initialContent);
|
|
279
|
+
}
|
|
280
|
+
|
|
265
281
|
await started.promise;
|
|
266
282
|
} catch (e) {
|
|
267
283
|
void stop();
|
|
@@ -725,6 +741,21 @@ const resyncServicesBase = runInSerial(measureWrap(async function resyncServices
|
|
|
725
741
|
|
|
726
742
|
await fs.promises.writeFile(parameterPath, newParametersString);
|
|
727
743
|
|
|
744
|
+
let launchReason: "crashed" | "update";
|
|
745
|
+
if (!sameParameters) {
|
|
746
|
+
launchReason = "update";
|
|
747
|
+
} else {
|
|
748
|
+
launchReason = "crashed";
|
|
749
|
+
}
|
|
750
|
+
void recordLaunch({
|
|
751
|
+
serviceId: config.serviceId,
|
|
752
|
+
serviceKey: config.parameters.key,
|
|
753
|
+
screenName,
|
|
754
|
+
machineId,
|
|
755
|
+
reason: launchReason,
|
|
756
|
+
time: Date.now(),
|
|
757
|
+
});
|
|
758
|
+
|
|
728
759
|
await runScreenCommand({
|
|
729
760
|
screenName,
|
|
730
761
|
command: config.parameters.command,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { isNodeTrue, list, timeInMinute, timeInSecond } from "socket-function/src/misc";
|
|
1
|
+
import { isNodeTrue, list, timeInDay, timeInMinute, timeInSecond } from "socket-function/src/misc";
|
|
2
2
|
import { nestArchives } from "../-a-archives/archives";
|
|
3
3
|
import { getArchivesBackblaze } from "../-a-archives/archivesBackBlaze";
|
|
4
4
|
import { getDomain } from "../config";
|
|
@@ -103,6 +103,69 @@ export const machineInfos = archiveJSONT<MachineInfo>(() => nestArchives("machin
|
|
|
103
103
|
export const serviceConfigs = archiveJSONT<ServiceConfig>(() => nestArchives("machines/service-configs/", getArchivesBackblaze(getDomain())));
|
|
104
104
|
export const machineConfigs = archiveJSONT<MachineConfig>(() => nestArchives("machines/machine-configs/", getArchivesBackblaze(getDomain())));
|
|
105
105
|
|
|
106
|
+
export type LaunchRecord = {
|
|
107
|
+
serviceId: string;
|
|
108
|
+
serviceKey: string;
|
|
109
|
+
screenName: string;
|
|
110
|
+
machineId: string;
|
|
111
|
+
reason: "crashed" | "update";
|
|
112
|
+
time: number;
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
export type LaunchSummary = {
|
|
116
|
+
time: number;
|
|
117
|
+
reason: string;
|
|
118
|
+
serviceKey: string;
|
|
119
|
+
key: string;
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const launches = lazy(() => nestArchives("machines/launches/", getArchivesBackblaze(getDomain())));
|
|
123
|
+
|
|
124
|
+
function formatLaunchDay(time: number): string {
|
|
125
|
+
let date = new Date(time);
|
|
126
|
+
let y = date.getUTCFullYear();
|
|
127
|
+
let m = String(date.getUTCMonth() + 1).padStart(2, "0");
|
|
128
|
+
let d = String(date.getUTCDate()).padStart(2, "0");
|
|
129
|
+
return `${y}-${m}-${d}`;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export async function recordLaunch(record: LaunchRecord) {
|
|
133
|
+
let day = formatLaunchDay(record.time);
|
|
134
|
+
let key = `${day}/${record.time}_${record.reason}_${record.serviceKey}`;
|
|
135
|
+
await launches().set(key, Buffer.from(JSON.stringify(record)));
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function parseLaunchKey(key: string): LaunchSummary {
|
|
139
|
+
let rest = key;
|
|
140
|
+
let slash = key.indexOf("/");
|
|
141
|
+
if (slash >= 0) {
|
|
142
|
+
rest = key.slice(slash + 1);
|
|
143
|
+
}
|
|
144
|
+
let timeStr = "";
|
|
145
|
+
let afterTime = rest;
|
|
146
|
+
let firstUnderscore = rest.indexOf("_");
|
|
147
|
+
if (firstUnderscore >= 0) {
|
|
148
|
+
timeStr = rest.slice(0, firstUnderscore);
|
|
149
|
+
afterTime = rest.slice(firstUnderscore + 1);
|
|
150
|
+
}
|
|
151
|
+
let reason = afterTime;
|
|
152
|
+
let serviceKey = "";
|
|
153
|
+
let secondUnderscore = afterTime.indexOf("_");
|
|
154
|
+
if (secondUnderscore >= 0) {
|
|
155
|
+
reason = afterTime.slice(0, secondUnderscore);
|
|
156
|
+
serviceKey = afterTime.slice(secondUnderscore + 1);
|
|
157
|
+
}
|
|
158
|
+
let time = Number(timeStr);
|
|
159
|
+
if (!Number.isFinite(time)) {
|
|
160
|
+
console.warn(`Unparseable launch key (bad time): ${key}`);
|
|
161
|
+
time = Date.now();
|
|
162
|
+
}
|
|
163
|
+
if (reason !== "crashed" && reason !== "update") {
|
|
164
|
+
console.warn(`Launch key has unexpected reason "${reason}": ${key}`);
|
|
165
|
+
}
|
|
166
|
+
return { time, reason, serviceKey, key };
|
|
167
|
+
}
|
|
168
|
+
|
|
106
169
|
export function doRegisterNodeForMachineCleanup() {
|
|
107
170
|
if (isNode()) {
|
|
108
171
|
void SocketFunction.mountPromise.finally(() => {
|
|
@@ -371,6 +434,22 @@ export class MachineServiceControllerBase {
|
|
|
371
434
|
});
|
|
372
435
|
}
|
|
373
436
|
|
|
437
|
+
public async getRecentLaunches(sinceDays: number): Promise<LaunchSummary[]> {
|
|
438
|
+
let now = Date.now();
|
|
439
|
+
let dayStrings: string[] = [];
|
|
440
|
+
for (let i = 0; i <= sinceDays; i++) {
|
|
441
|
+
dayStrings.push(formatLaunchDay(now - i * timeInDay));
|
|
442
|
+
}
|
|
443
|
+
let keyLists = await Promise.all(dayStrings.map(day => launches().find(`${day}/`)));
|
|
444
|
+
let summaries: LaunchSummary[] = [];
|
|
445
|
+
for (let keys of keyLists) {
|
|
446
|
+
for (let key of keys) {
|
|
447
|
+
summaries.push(parseLaunchKey(key));
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
return summaries;
|
|
451
|
+
}
|
|
452
|
+
|
|
374
453
|
public async deployFunctions(config: {
|
|
375
454
|
functionSpecs: FunctionSpec[];
|
|
376
455
|
prefixes: string[];
|
|
@@ -469,6 +548,7 @@ export const MachineServiceController = getSyncedController(
|
|
|
469
548
|
getPendingFunctions: {},
|
|
470
549
|
deployFunctions: {},
|
|
471
550
|
getLiveFunctions: {},
|
|
551
|
+
getRecentLaunches: {},
|
|
472
552
|
}),
|
|
473
553
|
() => ({
|
|
474
554
|
hooks: [assertIsManagementUser],
|
|
@@ -497,6 +577,7 @@ export const MachineServiceController = getSyncedController(
|
|
|
497
577
|
getGitInfo: ["gitInfo"],
|
|
498
578
|
getPendingFunctions: ["gitInfo"],
|
|
499
579
|
getLiveFunctions: ["gitInfo"],
|
|
580
|
+
getRecentLaunches: ["launches"],
|
|
500
581
|
}
|
|
501
582
|
}
|
|
502
583
|
);
|
|
@@ -142,7 +142,7 @@ class NodeConnectionsControllerBase {
|
|
|
142
142
|
}
|
|
143
143
|
|
|
144
144
|
public async getEntryPoint_forBrowser(nodeId: string) {
|
|
145
|
-
return await NodeCapabilitiesController.nodes[nodeId].
|
|
145
|
+
return (await NodeCapabilitiesController.nodes[nodeId].getMetadata()).entryPoint;
|
|
146
146
|
}
|
|
147
147
|
|
|
148
148
|
public async getAllNodeIds() {
|