querysub 0.451.0 → 0.453.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ import { delay } from "socket-function/src/batching";
12
12
  import debugbreak from "debugbreak";
13
13
  import { formatTime } from "socket-function/src/formatting/format";
14
14
  import type { DebugFunctionShardInfo } from "../3-path-functions/PathFunctionRunner";
15
+ import type { AuthoritySpec } from "../0-path-value-core/PathRouter";
15
16
  import { requiresNetworkTrustHook } from "../-d-trust/NetworkTrust2";
16
17
  import { isNoNetwork } from "../config";
17
18
  import { getDebuggerUrl } from "../diagnostics/listenOnDebugger";
@@ -94,10 +95,10 @@ export async function getControllerNodeIdList(
94
95
  await Promise.all(nodeIdsToTest.map(async nodeId => {
95
96
  let result = await doesNodeExposeController(nodeId, controller);
96
97
  if (result) {
97
- let entryPoint = await NodeCapabilitiesController.nodes[nodeId].getEntryPoint();
98
+ let metadata = await NodeCapabilitiesController.nodes[nodeId].getMetadata();
98
99
  passedNodeIds.set(nodeId, {
99
100
  machineId: getMachineId(nodeId),
100
- entryPoint,
101
+ entryPoint: metadata.entryPoint,
101
102
  });
102
103
  }
103
104
  }));
@@ -117,38 +118,43 @@ export async function getControllerNodeIdList(
117
118
 
118
119
 
119
120
  export async function doesNodeExposeController(reconnectNodeId: string, controller: SocketRegistered<{}>): Promise<boolean> {
120
- let exposedControllers = await timeoutToUndefinedSilent(10_000, NodeCapabilitiesController.nodes[reconnectNodeId].getExposedControllers());
121
+ let metadata = await timeoutToUndefinedSilent(10_000, NodeCapabilitiesController.nodes[reconnectNodeId].getMetadata());
121
122
 
122
- if (exposedControllers?.includes(controller._classGuid)) {
123
+ if (metadata?.exposedControllers.includes(controller._classGuid)) {
123
124
  return true;
124
125
  }
125
126
  return false;
126
127
  }
127
128
 
128
129
  const startupTime = Date.now();
130
+
131
+ export type NodeMetadata = {
132
+ entryPoint: string;
133
+ startupTime: number;
134
+ processId: number;
135
+ functionRunnerShards: DebugFunctionShardInfo[];
136
+ authoritySpec: AuthoritySpec;
137
+ exposedControllers: string[];
138
+ trueTimeOffset: number;
139
+ };
140
+
129
141
  class NodeCapabilitiesControllerBase {
130
- public async getExposedControllers() {
131
- return Array.from(SocketFunction.exposedClasses);
132
- }
133
- public async getEntryPoint() {
134
- return process.argv[1];
135
- }
136
- public async getStartupTime() {
137
- return startupTime;
142
+ public async getMetadata(): Promise<NodeMetadata> {
143
+ let { authorityLookup } = await import("../0-path-value-core/AuthorityLookup");
144
+ return {
145
+ entryPoint: process.argv[1],
146
+ startupTime,
147
+ processId: process.pid,
148
+ functionRunnerShards: getFunctionRunnerShards(),
149
+ authoritySpec: authorityLookup.getOurSpec(),
150
+ exposedControllers: Array.from(SocketFunction.exposedClasses),
151
+ trueTimeOffset: getTrueTimeOffset(),
152
+ };
138
153
  }
154
+
139
155
  public async getMemoryUsage() {
140
156
  return process.memoryUsage();
141
157
  }
142
- public async getProcessId() {
143
- return process.pid;
144
- }
145
- public async getFunctionRunnerShards() {
146
- return getFunctionRunnerShards();
147
- }
148
-
149
- public async getTrueTimeOffset() {
150
- return getTrueTimeOffset();
151
- }
152
158
 
153
159
  public async getInspectURL() {
154
160
  return await getDebuggerUrl();
@@ -188,13 +194,8 @@ export const NodeCapabilitiesController = SocketFunction.register(
188
194
  "NodeCapabilitiesController-399b7067-75c3-4d92-8be3-8470bde32d3c",
189
195
  new NodeCapabilitiesControllerBase(),
190
196
  () => ({
191
- getExposedControllers: {},
192
- getEntryPoint: {},
193
- getStartupTime: {},
197
+ getMetadata: {},
194
198
  getMemoryUsage: {},
195
- getProcessId: {},
196
- getFunctionRunnerShards: {},
197
- getTrueTimeOffset: {},
198
199
  getInspectURL: { hooks: [requiresNetworkTrustHook] },
199
200
  exposeExternalDebugPortOnce: { hooks: [requiresNetworkTrustHook] },
200
201
  }),
@@ -245,7 +245,31 @@ export function createArchiveLocker2(config: {
245
245
  newTransaction.ops.push({ type: "delete", key: obj.file });
246
246
  }
247
247
  }
248
+ // Per-op trace so a "written too slowly" crash on a specific file can be
249
+ // correlated against the exact transaction that touched it. The aggregate
250
+ // "Joining N => M" log doesn't name the files. We log before the attempt and
251
+ // again after with the resulting status (including "rejected").
252
+ const describeOp = (op: typeof newTransaction.ops[number]): string => {
253
+ let detail = "";
254
+ if (op.key.endsWith(".data") || op.key.endsWith(".data.locked")) {
255
+ try {
256
+ detail = ` source=${pathValueArchives.decodeDataPath(op.key).sourceType}`;
257
+ } catch (e) {
258
+ detail = ` decodeError=${(e as Error).message ?? e}`;
259
+ }
260
+ }
261
+ return `${op.type} ${op.key}${detail}`;
262
+ };
263
+ for (let op of newTransaction.ops) {
264
+ console.info(`Transaction attempt: ${describeOp(op)}`);
265
+ }
266
+
248
267
  let status = await locker.addTransaction(newTransaction);
268
+
269
+ for (let op of newTransaction.ops) {
270
+ console.info(`Transaction ${status}: ${describeOp(op)}`);
271
+ }
272
+
249
273
  if (status === "accepted") {
250
274
  let newFiles = new Set<string>();
251
275
  for (let file of files) {
@@ -68,7 +68,7 @@ export const MAX_CHANGE_AGE = MAX_ACCEPTED_CHANGE_AGE * 2;
68
68
  /** Extra time we keep clientside prediction rejections for, to give us time to receive the actual values. */
69
69
  export const CLIENTSIDE_PREDICT_LEEWAY = 500;
70
70
 
71
- /** Any PathValues which take longer than this to wrist should be rejected, so... we have
71
+ /** Any PathValues which take longer than this to write should be rejected, so... we have
72
72
  * to write well before this time.
73
73
  * - This has to be at least MAX_CHANGE_AGE * 4.5 + the time to serialize and
74
74
  * send our data to remote storage.
@@ -1472,8 +1472,8 @@ export class PathValueProxyWatcher {
1472
1472
  let notWatchingUnsyncedParent = reallyUnsyncedParentAccesses.filter(x => !remoteWatcher.debugIsWatchingPath(x));
1473
1473
  if (notWatchingUnsynced.length !== 0 || notWatchingUnsyncedParent.length !== 0) {
1474
1474
  console.error((`${red("WATCHER FAILED TO SYNC")} ${watcher.debugName} ${magenta("NOT REMOTE WATCHING REQUIRED PATHS")}. This means our sync or unsync (likely unsync) logic is broken, in remoteWatcher/clientWatcher. OR, there were no read nodes when we tried to sync (we don't handle missing read nodes correctly at the moment)`), { notWatchingUnsynced, notWatchingUnsyncedParent }, watcher.options.watchFunction);
1475
- debugbreak(2);
1476
- debugger;
1475
+ // debugbreak(2);
1476
+ // debugger;
1477
1477
  } else {
1478
1478
  console.error((`${red("WATCHER FAILED TO SYNC")} ${watcher.debugName} ${magenta("DID NOT RECEIVE PATH VALUES")}. This means PathValueServer is not responding to watches, either to specific paths, or for all paths`), { reallyUnsyncedAccesses, reallyUnsyncedParentAccesses }, watcher.options.watchFunction);
1479
1479
  // debugbreak(2);
@@ -1481,12 +1481,12 @@ export class PathValueProxyWatcher {
1481
1481
  }
1482
1482
  } else if (watcher.lastSpecialPromiseUnsynced) {
1483
1483
  console.warn((`${yellow("WATCHER SLOW TO SYNC")} ${watcher.debugName} ${magenta("DEPENDENT PROMISE NEVER RESOLVED")}. This promise might resolve, but it probably won't. Slow promises should be detached from the watcher system and use multiple watchers/writes, instead of blocking on a promise.`), watcher.lastSpecialPromiseUnsyncedReason, watcher.options.watchFunction);
1484
- debugbreak(2);
1485
- debugger;
1484
+ // debugbreak(2);
1485
+ // debugger;
1486
1486
  } else {
1487
1487
  console.error((`${red("WATCHER FAILED TO SYNC")} ${watcher.debugName} ${magenta("DID NOT TRIGGER WATCHER")}. This means either ProxyWatcher is broken (and isn't triggering when it should, or isn't watching when it should), or ClientWatcher/PathWatcher are broken and are not properly informing callers of watchers.`), { lastUnsyncedAccesses: watcher.lastUnsyncedAccesses, lastUnsyncedParentAccesses: watcher.lastUnsyncedParentAccesses }, watcher.options.watchFunction);
1488
- debugbreak(2);
1489
- debugger;
1488
+ // debugbreak(2);
1489
+ // debugger;
1490
1490
  }
1491
1491
  }, 60000);
1492
1492
  }
@@ -44,7 +44,7 @@ import { waitForFirstTimeSync } from "socket-function/time/trueTimeShim";
44
44
  import { logMeasureTable, measureBlock, measureFnc, measureWrap, startMeasure } from "socket-function/src/profiling/measure";
45
45
  import { delay } from "socket-function/src/batching";
46
46
  import { MaybePromise } from "socket-function/src/types";
47
- import { devDebugbreak, getDomain, isBootstrapOnly, isDynamicallyLoading, isPublic, noSyncing } from "../config";
47
+ import { devDebugbreak, getDomain, isBootstrapOnly, isDynamicallyLoading, isPublic, isRecovery, noSyncing } from "../config";
48
48
  import { Schema2, Schema2T, t } from "../2-proxy/schema2";
49
49
  import { CALL_PERMISSIONS_KEY } from "./permissionsShared";
50
50
  import yargs, { check } from "yargs";
@@ -1035,19 +1035,21 @@ export class Querysub {
1035
1035
  return module;
1036
1036
  }
1037
1037
 
1038
- RequireController.addMapGetModules(async (result, args) => {
1039
- let configObj = args[2] as { signedIdentity: SignedIdentity | undefined } | undefined;
1040
- if (!await isAllowedToSeeSource(configObj?.signedIdentity)) {
1041
- await isAllowedToSeeSource(configObj?.signedIdentity);
1042
- //console.log(red(`Not allowed to see source`));
1043
- for (let [key, value] of Object.entries(result.modules)) {
1044
- result.modules[key] = stripSource(value);
1038
+ if (!isRecovery()) {
1039
+ RequireController.addMapGetModules(async (result, args) => {
1040
+ let configObj = args[2] as { signedIdentity: SignedIdentity | undefined } | undefined;
1041
+ if (!await isAllowedToSeeSource(configObj?.signedIdentity)) {
1042
+ await isAllowedToSeeSource(configObj?.signedIdentity);
1043
+ //console.log(red(`Not allowed to see source`));
1044
+ for (let [key, value] of Object.entries(result.modules)) {
1045
+ result.modules[key] = stripSource(value);
1046
+ }
1047
+ } else {
1048
+ //console.log(green(`Allowed to see source`));
1045
1049
  }
1046
- } else {
1047
- //console.log(green(`Allowed to see source`));
1048
- }
1049
- return result;
1050
- });
1050
+ return result;
1051
+ });
1052
+ }
1051
1053
  }
1052
1054
 
1053
1055
  public static async hostService(name: string, port = 0) {
@@ -29,6 +29,7 @@ async function main() {
29
29
  await Querysub.hostService("gc");
30
30
 
31
31
  if (yargObj.watch) {
32
+ console.log("Running in watch mode.");
32
33
  await runInfinitePollCallAtStart(timeInDay, runAliveCheckerIteration);
33
34
  } else {
34
35
  try {
@@ -2,7 +2,7 @@ import "../inject";
2
2
 
3
3
  import { logErrors } from "../errors";
4
4
  import { PathValueArchives, pathValueArchives } from "../0-path-value-core/pathValueArchives";
5
- import { PathValue, VALUE_GC_THRESHOLD } from "../0-path-value-core/pathValueCore";
5
+ import { ARCHIVE_FLUSH_LIMIT, PathValue, VALUE_GC_THRESHOLD } from "../0-path-value-core/pathValueCore";
6
6
  import { runInfinitePollCallAtStart } from "socket-function/src/batching";
7
7
  import { measureBlock } from "socket-function/src/profiling/measure";
8
8
  import { pathValueSerializer } from "../-h-path-value-serialize/PathValueSerializer";
@@ -41,7 +41,12 @@ async function runGenesisJoinIteration(config?: { force?: boolean }) {
41
41
  valueFiles = valueFiles.filter(x => {
42
42
  let obj = pathValueArchives.decodeDataPath(x.file);
43
43
  if (!obj.minTime) return false;
44
- return obj.sourceType === "genesis";
44
+ if (obj.sourceType === "genesis") {
45
+ // Has to be old enough
46
+ return Date.now() - obj.time < ARCHIVE_FLUSH_LIMIT;
47
+ }
48
+ // Anything else can be merged immediately.
49
+ return true;
45
50
  });
46
51
  }
47
52
  let withinTimeRangeCount = valueFiles.length;
@@ -123,6 +128,7 @@ async function main() {
123
128
  await Querysub.hostService("join");
124
129
 
125
130
  if (yargObj.watch) {
131
+ console.log("Running in watch mode.");
126
132
  await runInfinitePollCallAtStart(VALUE_GC_THRESHOLD * 0.8, runGenesisJoinIteration);
127
133
  } else {
128
134
  try {
@@ -0,0 +1,65 @@
1
+ module.allowclient = true;
2
+
3
+ import { qreact } from "../4-dom/qreact";
4
+ import { css } from "typesafecss";
5
+ import { getBrowserUrlNode } from "../-f-node-discovery/NodeDiscovery";
6
+ import { MachineServiceController } from "./machineSchema";
7
+
8
+ const SINCE_DAYS = 2;
9
+ const CRASH_HUE = 0;
10
+ const OTHER_HUE = 210;
11
+
12
+ export class LaunchTrackingHeader extends qreact.Component {
13
+ render() {
14
+ let summaries = MachineServiceController(getBrowserUrlNode()).getRecentLaunches(SINCE_DAYS);
15
+ if (!summaries) return undefined;
16
+ if (summaries.length === 0) return undefined;
17
+
18
+ let totalCrashes = 0;
19
+ let totalOther = 0;
20
+ let perKey = new Map<string, { crashes: number; other: number }>();
21
+ for (let s of summaries) {
22
+ let isCrash = s.reason === "crashed";
23
+ if (isCrash) totalCrashes++;
24
+ else totalOther++;
25
+ let entry = perKey.get(s.serviceKey);
26
+ if (!entry) {
27
+ entry = { crashes: 0, other: 0 };
28
+ perKey.set(s.serviceKey, entry);
29
+ }
30
+ if (isCrash) entry.crashes++;
31
+ else entry.other++;
32
+ }
33
+
34
+ let ranked: { serviceKey: string; crashes: number; other: number }[] = [];
35
+ for (let [serviceKey, c] of perKey) {
36
+ ranked.push({ serviceKey, crashes: c.crashes, other: c.other });
37
+ }
38
+ ranked.sort((a, b) => {
39
+ if (b.crashes !== a.crashes) return b.crashes - a.crashes;
40
+ return (b.crashes + b.other) - (a.crashes + a.other);
41
+ });
42
+ let top = ranked[0];
43
+
44
+ let title = `Launches in last ${SINCE_DAYS} days: ${totalCrashes} crashed, ${totalOther} other`;
45
+ for (let r of ranked) {
46
+ title += `\n${r.serviceKey}: ${r.crashes} crashed, ${r.other} other`;
47
+ }
48
+
49
+ return <div title={title} className={css.hbox(6).colorhsl(0, 0, 20)}>
50
+ <span>🚀</span>
51
+ <span>
52
+ <span className={css.colorhsl(CRASH_HUE, 70, 35).boldStyle}>{totalCrashes}</span>
53
+ <span className={css.colorhsl(0, 0, 55)}>|</span>
54
+ <span className={css.colorhsl(OTHER_HUE, 65, 35).boldStyle}>{totalOther}</span>
55
+ </span>
56
+ {top && top.crashes > 0 &&
57
+ <span className={css.colorhsl(0, 0, 40)}>
58
+ (<span className={css.colorhsl(CRASH_HUE, 70, 35)}>{top.crashes}</span>
59
+ <span className={css.colorhsl(0, 0, 55)}>|</span>
60
+ <span className={css.colorhsl(OTHER_HUE, 65, 35)}>{top.other}</span>)
61
+ </span>
62
+ }
63
+ </div>;
64
+ }
65
+ }
@@ -4,7 +4,7 @@ import { measureWrap } from "socket-function/src/profiling/measure";
4
4
  import { getOwnMachineId } from "../-a-auth/certs";
5
5
  import { forceRemoveNode, getOurNodeId, getOurNodeIdAssert } from "../-f-node-discovery/NodeDiscovery";
6
6
  import { Querysub } from "../4-querysub/QuerysubController";
7
- import { MACHINE_RESYNC_INTERVAL, MachineServiceControllerBase, MachineInfo, ServiceConfig, serviceConfigs, SERVICE_FOLDER, machineInfos, SERVICE_NODE_FILE_NAME, getEffectiveServiceConfigs } from "./machineSchema";
7
+ import { MACHINE_RESYNC_INTERVAL, MachineServiceControllerBase, MachineInfo, ServiceConfig, serviceConfigs, SERVICE_FOLDER, machineInfos, SERVICE_NODE_FILE_NAME, getEffectiveServiceConfigs, recordLaunch } from "./machineSchema";
8
8
  import { runPromise } from "../functional/runCommand";
9
9
  import { getExternalIP } from "socket-function/src/networking";
10
10
  import { errorToUndefined, errorToUndefinedSilent } from "../errors";
@@ -183,25 +183,21 @@ export async function streamScreenOutput(config: {
183
183
  const pipeFile = `${root}${screenName}/pipe.txt`;
184
184
  const tailScript = `${root}${screenName}/smart_tail.sh`;
185
185
 
186
- // Create a smart tail script that handles file truncation
186
+ // Create a smart tail script that handles file truncation. It does
187
+ // NOT emit the initial backlog - that is read and sent directly by
188
+ // this process (as a single message) below. The script is given a
189
+ // start byte offset and only emits content appended after it.
187
190
  await fs.promises.writeFile(tailScript, `#!/bin/bash
188
191
  PIPE_FILE="$1"
192
+ START_POS="$2"
189
193
 
190
- # Initialize position tracking
191
- CURRENT_POS=0
194
+ # Position tracking starts at the offset we already read directly.
195
+ CURRENT_POS=$START_POS
196
+ CURRENT_SIZE=$START_POS
192
197
  LAST_MTIME=""
193
198
 
194
- # Read initial content and get file size
195
199
  if [ -f "$PIPE_FILE" ]; then
196
- CURRENT_SIZE=$(stat -c%s "$PIPE_FILE" 2>/dev/null || wc -c < "$PIPE_FILE")
197
200
  LAST_MTIME=$(stat -c%Y "$PIPE_FILE" 2>/dev/null || stat -f%m "$PIPE_FILE" 2>/dev/null || echo "0")
198
- # Output initial content like tail would
199
- if [ $CURRENT_SIZE -gt 0 ]; then
200
- cat "$PIPE_FILE"
201
- CURRENT_POS=$CURRENT_SIZE
202
- fi
203
- else
204
- CURRENT_SIZE=0
205
201
  fi
206
202
 
207
203
  # Poll for file changes every 250ms
@@ -237,8 +233,21 @@ done`);
237
233
 
238
234
  await runPromise(`chmod +x ${tailScript}`);
239
235
 
240
- // Use our smart tail script instead of regular tail
241
- childProcess = spawn("bash", [tailScript, pipeFile], {
236
+ // Read the existing backlog ourselves and deliver it as a single
237
+ // onData call. If we let the tail script cat it, the stream chunks
238
+ // it and runInSerial dribbles it out one round-trip at a time.
239
+ let initialContent = "";
240
+ let initialByteSize = 0;
241
+ try {
242
+ initialContent = await fs.promises.readFile(pipeFile, "utf8");
243
+ initialByteSize = Buffer.byteLength(initialContent, "utf8");
244
+ } catch {
245
+ // pipe.txt may not exist yet; the tail script will pick it up.
246
+ }
247
+
248
+ // The tail script emits only content appended after initialByteSize,
249
+ // so the backlog we just read is never sent twice.
250
+ childProcess = spawn("bash", [tailScript, pipeFile, String(initialByteSize)], {
242
251
  stdio: "pipe",
243
252
  });
244
253
 
@@ -262,6 +271,13 @@ done`);
262
271
  started.reject(err);
263
272
  });
264
273
 
274
+ if (initialContent) {
275
+ // Queued synchronously here, before any stdout 'data' event can
276
+ // fire, so the backlog is always delivered ahead of new output.
277
+ started.resolve();
278
+ void onDataWrapped(initialContent);
279
+ }
280
+
265
281
  await started.promise;
266
282
  } catch (e) {
267
283
  void stop();
@@ -725,6 +741,21 @@ const resyncServicesBase = runInSerial(measureWrap(async function resyncServices
725
741
 
726
742
  await fs.promises.writeFile(parameterPath, newParametersString);
727
743
 
744
+ let launchReason: "crashed" | "update";
745
+ if (!sameParameters) {
746
+ launchReason = "update";
747
+ } else {
748
+ launchReason = "crashed";
749
+ }
750
+ void recordLaunch({
751
+ serviceId: config.serviceId,
752
+ serviceKey: config.parameters.key,
753
+ screenName,
754
+ machineId,
755
+ reason: launchReason,
756
+ time: Date.now(),
757
+ });
758
+
728
759
  await runScreenCommand({
729
760
  screenName,
730
761
  command: config.parameters.command,
@@ -839,6 +870,92 @@ async function getPPID(pid: string) {
839
870
  }
840
871
  }
841
872
 
873
+ // Node prints this when the process has exited but the V8 inspector is still
874
+ // holding it open waiting for a debugger client to detach. If a debugger client
875
+ // drops uncleanly the process can sit on this forever, holding the screen and
876
+ // preventing the supervisor from spawning a fresh instance. The watcher below
877
+ // polls each screen's pipe.txt for this phrase and sends Ctrl+C to break it out.
878
+ const DEBUGGER_DISCONNECT_MESSAGE = "Waiting for the debugger to disconnect...";
879
+
880
+ // To avoid false positives from app code that happens to log a string
881
+ // containing the phrase, the wedge is only reported when:
882
+ // 1. the phrase is on its own line (start-of-string or after a newline), AND
883
+ // 2. nothing else has been logged after it (only trailing whitespace/EOF).
884
+ // In a real wedge, this line is the very last thing Node writes before going
885
+ // silent — so any later output rules out the wedge interpretation.
886
+ const DEBUGGER_DISCONNECT_TAIL_PATTERN = /(?:^|\r?\n)Waiting for the debugger to disconnect\.\.\.\s*$/;
887
+
888
+ // How often the wedge watcher scans every service's pipe.txt.
889
+ const DEBUGGER_WEDGE_POLL_INTERVAL = timeInSecond * 15;
890
+ // How much of the tail of pipe.txt to read on each scan. The phrase is short
891
+ // and appears near the very end of the log when a process is stuck, so a few
892
+ // KB is plenty and keeps scans cheap.
893
+ const DEBUGGER_WEDGE_TAIL_BYTES = 8 * 1024;
894
+ // Suppress repeated Ctrl+C bursts to the same screen while it tears down; if
895
+ // the first signal didn't break the wait, give it this long before we try again.
896
+ const DEBUGGER_WEDGE_RESIGNAL_DELAY = timeInMinute;
897
+ // After a first match, wait this long and re-check before signaling. If the
898
+ // process is still alive and just happened to log a message ending in the
899
+ // phrase, more output will appear in the meantime and the second check fails.
900
+ const DEBUGGER_WEDGE_RECHECK_DELAY = timeInSecond * 3;
901
+
902
+ // screenName -> last time we sent Ctrl+C because of a debugger wedge.
903
+ const lastDebuggerWedgeSignal = new Map<string, number>();
904
+
905
+ async function readPipeFileTail(pipeFile: string, maxBytes: number): Promise<string> {
906
+ let handle: fs.promises.FileHandle | undefined;
907
+ try {
908
+ handle = await fs.promises.open(pipeFile, "r");
909
+ let stat = await handle.stat();
910
+ let start = Math.max(0, stat.size - maxBytes);
911
+ let length = stat.size - start;
912
+ if (length <= 0) return "";
913
+ let buf = Buffer.alloc(length);
914
+ await handle.read(buf, 0, length, start);
915
+ return buf.toString("utf8");
916
+ } catch {
917
+ // File doesn't exist yet, or got truncated mid-read — treat as empty.
918
+ return "";
919
+ } finally {
920
+ if (handle) {
921
+ try {
922
+ await handle.close();
923
+ } catch {
924
+ // ignore
925
+ }
926
+ }
927
+ }
928
+ }
929
+
930
+ async function unwedgeStuckDebuggerScreens(): Promise<void> {
931
+ let prefix = getTmuxPrefix();
932
+ let screens = await getScreenState(false);
933
+ for (let { screenName } of screens) {
934
+ let pipeFile = os.homedir() + "/" + SERVICE_FOLDER + screenName + "/pipe.txt";
935
+ let tail1 = await readPipeFileTail(pipeFile, DEBUGGER_WEDGE_TAIL_BYTES);
936
+ if (!DEBUGGER_DISCONNECT_TAIL_PATTERN.test(tail1)) continue;
937
+
938
+ // Confirm the process is actually wedged and not just briefly idle by
939
+ // re-checking after a short delay. A live process will write more
940
+ // output in this window, which moves the phrase away from the end
941
+ // and fails the second match.
942
+ await delay(DEBUGGER_WEDGE_RECHECK_DELAY);
943
+ let tail2 = await readPipeFileTail(pipeFile, DEBUGGER_WEDGE_TAIL_BYTES);
944
+ if (!DEBUGGER_DISCONNECT_TAIL_PATTERN.test(tail2)) continue;
945
+
946
+ let last = lastDebuggerWedgeSignal.get(screenName) ?? 0;
947
+ if (Date.now() - last < DEBUGGER_WEDGE_RESIGNAL_DELAY) continue;
948
+ lastDebuggerWedgeSignal.set(screenName, Date.now());
949
+
950
+ console.warn(red(`Detected stuck "${DEBUGGER_DISCONNECT_MESSAGE}" in ${screenName} (confirmed across two checks ${DEBUGGER_WEDGE_RECHECK_DELAY}ms apart); sending Ctrl+C to unblock it.`));
951
+ try {
952
+ await runPromise(`${prefix}tmux send-keys -t ${screenName} 'C-c' Enter`);
953
+ } catch (e: any) {
954
+ console.warn(`Failed to send Ctrl+C to ${screenName}: ${e.stack ?? e}`);
955
+ }
956
+ }
957
+ }
958
+
842
959
  export async function machineApplyMain() {
843
960
  let parentPID = process.argv[2];
844
961
  // Wait for the console to get shimmed
@@ -867,6 +984,14 @@ export async function machineApplyMain() {
867
984
  await Querysub.hostService("machine-apply");
868
985
  onServiceConfigChange(resyncServices);
869
986
 
987
+ runInfinitePoll(DEBUGGER_WEDGE_POLL_INTERVAL, async () => {
988
+ try {
989
+ await unwedgeStuckDebuggerScreens();
990
+ } catch (e: any) {
991
+ console.error(`Error in debugger-wedge watcher: ${e.stack ?? e}`);
992
+ }
993
+ });
994
+
870
995
  runInfinitePoll(timeInMinute * 3, async () => {
871
996
  //console.log(magenta(`Quick outdated check at ${new Date().toISOString()}`));
872
997
  // console.log(magenta("Likely outdated, resyncing now"));
@@ -1,4 +1,4 @@
1
- import { isNodeTrue, list, timeInMinute, timeInSecond } from "socket-function/src/misc";
1
+ import { isNodeTrue, list, timeInDay, timeInMinute, timeInSecond } from "socket-function/src/misc";
2
2
  import { nestArchives } from "../-a-archives/archives";
3
3
  import { getArchivesBackblaze } from "../-a-archives/archivesBackBlaze";
4
4
  import { getDomain } from "../config";
@@ -103,6 +103,69 @@ export const machineInfos = archiveJSONT<MachineInfo>(() => nestArchives("machin
103
103
  export const serviceConfigs = archiveJSONT<ServiceConfig>(() => nestArchives("machines/service-configs/", getArchivesBackblaze(getDomain())));
104
104
  export const machineConfigs = archiveJSONT<MachineConfig>(() => nestArchives("machines/machine-configs/", getArchivesBackblaze(getDomain())));
105
105
 
106
+ export type LaunchRecord = {
107
+ serviceId: string;
108
+ serviceKey: string;
109
+ screenName: string;
110
+ machineId: string;
111
+ reason: "crashed" | "update";
112
+ time: number;
113
+ };
114
+
115
+ export type LaunchSummary = {
116
+ time: number;
117
+ reason: string;
118
+ serviceKey: string;
119
+ key: string;
120
+ };
121
+
122
+ const launches = lazy(() => nestArchives("machines/launches/", getArchivesBackblaze(getDomain())));
123
+
124
+ function formatLaunchDay(time: number): string {
125
+ let date = new Date(time);
126
+ let y = date.getUTCFullYear();
127
+ let m = String(date.getUTCMonth() + 1).padStart(2, "0");
128
+ let d = String(date.getUTCDate()).padStart(2, "0");
129
+ return `${y}-${m}-${d}`;
130
+ }
131
+
132
+ export async function recordLaunch(record: LaunchRecord) {
133
+ let day = formatLaunchDay(record.time);
134
+ let key = `${day}/${record.time}_${record.reason}_${record.serviceKey}`;
135
+ await launches().set(key, Buffer.from(JSON.stringify(record)));
136
+ }
137
+
138
+ function parseLaunchKey(key: string): LaunchSummary {
139
+ let rest = key;
140
+ let slash = key.indexOf("/");
141
+ if (slash >= 0) {
142
+ rest = key.slice(slash + 1);
143
+ }
144
+ let timeStr = "";
145
+ let afterTime = rest;
146
+ let firstUnderscore = rest.indexOf("_");
147
+ if (firstUnderscore >= 0) {
148
+ timeStr = rest.slice(0, firstUnderscore);
149
+ afterTime = rest.slice(firstUnderscore + 1);
150
+ }
151
+ let reason = afterTime;
152
+ let serviceKey = "";
153
+ let secondUnderscore = afterTime.indexOf("_");
154
+ if (secondUnderscore >= 0) {
155
+ reason = afterTime.slice(0, secondUnderscore);
156
+ serviceKey = afterTime.slice(secondUnderscore + 1);
157
+ }
158
+ let time = Number(timeStr);
159
+ if (!Number.isFinite(time)) {
160
+ console.warn(`Unparseable launch key (bad time): ${key}`);
161
+ time = Date.now();
162
+ }
163
+ if (reason !== "crashed" && reason !== "update") {
164
+ console.warn(`Launch key has unexpected reason "${reason}": ${key}`);
165
+ }
166
+ return { time, reason, serviceKey, key };
167
+ }
168
+
106
169
  export function doRegisterNodeForMachineCleanup() {
107
170
  if (isNode()) {
108
171
  void SocketFunction.mountPromise.finally(() => {
@@ -371,6 +434,22 @@ export class MachineServiceControllerBase {
371
434
  });
372
435
  }
373
436
 
437
+ public async getRecentLaunches(sinceDays: number): Promise<LaunchSummary[]> {
438
+ let now = Date.now();
439
+ let dayStrings: string[] = [];
440
+ for (let i = 0; i <= sinceDays; i++) {
441
+ dayStrings.push(formatLaunchDay(now - i * timeInDay));
442
+ }
443
+ let keyLists = await Promise.all(dayStrings.map(day => launches().find(`${day}/`)));
444
+ let summaries: LaunchSummary[] = [];
445
+ for (let keys of keyLists) {
446
+ for (let key of keys) {
447
+ summaries.push(parseLaunchKey(key));
448
+ }
449
+ }
450
+ return summaries;
451
+ }
452
+
374
453
  public async deployFunctions(config: {
375
454
  functionSpecs: FunctionSpec[];
376
455
  prefixes: string[];
@@ -469,6 +548,7 @@ export const MachineServiceController = getSyncedController(
469
548
  getPendingFunctions: {},
470
549
  deployFunctions: {},
471
550
  getLiveFunctions: {},
551
+ getRecentLaunches: {},
472
552
  }),
473
553
  () => ({
474
554
  hooks: [assertIsManagementUser],
@@ -497,6 +577,7 @@ export const MachineServiceController = getSyncedController(
497
577
  getGitInfo: ["gitInfo"],
498
578
  getPendingFunctions: ["gitInfo"],
499
579
  getLiveFunctions: ["gitInfo"],
580
+ getRecentLaunches: ["launches"],
500
581
  }
501
582
  }
502
583
  );
@@ -142,7 +142,7 @@ class NodeConnectionsControllerBase {
142
142
  }
143
143
 
144
144
  public async getEntryPoint_forBrowser(nodeId: string) {
145
- return await NodeCapabilitiesController.nodes[nodeId].getEntryPoint();
145
+ return (await NodeCapabilitiesController.nodes[nodeId].getMetadata()).entryPoint;
146
146
  }
147
147
 
148
148
  public async getAllNodeIds() {