@love-moon/conductor-cli 0.2.17 → 0.2.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/conductor-config.js +28 -29
- package/bin/conductor-fire.js +210 -18
- package/bin/conductor-send-file.js +290 -0
- package/bin/conductor.js +5 -1
- package/package.json +6 -5
- package/src/daemon.js +975 -29
- package/src/runtime-backends.js +31 -0
package/src/daemon.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import os from "node:os";
|
|
4
|
+
import { createRequire } from "node:module";
|
|
4
5
|
import { spawn } from "node:child_process";
|
|
5
6
|
import { fileURLToPath } from "node:url";
|
|
6
7
|
|
|
@@ -9,11 +10,13 @@ import yaml from "js-yaml";
|
|
|
9
10
|
|
|
10
11
|
import { ConductorWebSocketClient, ConductorConfig, loadConfig, ConfigFileNotFound } from "@love-moon/conductor-sdk";
|
|
11
12
|
import { DaemonLogCollector } from "./log-collector.js";
|
|
13
|
+
import { filterRuntimeSupportedAllowCliList, normalizeRuntimeBackendName } from "./runtime-backends.js";
|
|
12
14
|
|
|
13
15
|
dotenv.config();
|
|
14
16
|
|
|
15
17
|
const __filename = fileURLToPath(import.meta.url);
|
|
16
18
|
const __dirname = path.dirname(__filename);
|
|
19
|
+
const moduleRequire = createRequire(import.meta.url);
|
|
17
20
|
const CLI_PATH = path.resolve(__dirname, "..", "bin", "conductor-fire.js");
|
|
18
21
|
const DAEMON_LOG_DIR = path.join(os.homedir(), ".conductor", "logs");
|
|
19
22
|
const DAEMON_LOG_PATH = path.join(DAEMON_LOG_DIR, "conductor-daemon.log");
|
|
@@ -22,6 +25,10 @@ const PLAN_LIMIT_MESSAGES = {
|
|
|
22
25
|
app_active_task: "Free plan limit reached: only 1 active app task is allowed.",
|
|
23
26
|
daemon_active_connection: "Free plan limit reached: only 1 active daemon connection is allowed.",
|
|
24
27
|
};
|
|
28
|
+
const DEFAULT_TERMINAL_COLS = 120;
|
|
29
|
+
const DEFAULT_TERMINAL_ROWS = 40;
|
|
30
|
+
const DEFAULT_TERMINAL_RING_BUFFER_MAX_BYTES = 2 * 1024 * 1024;
|
|
31
|
+
let nodePtySpawnPromise = null;
|
|
25
32
|
|
|
26
33
|
function appendDaemonLog(line) {
|
|
27
34
|
try {
|
|
@@ -112,16 +119,118 @@ function getPlanLimitMessage(payload) {
|
|
|
112
119
|
const DEFAULT_CLI_LIST = {
|
|
113
120
|
codex: "codex --dangerously-bypass-approvals-and-sandbox",
|
|
114
121
|
claude: "claude --dangerously-skip-permissions",
|
|
122
|
+
opencode: "opencode",
|
|
115
123
|
};
|
|
116
124
|
|
|
117
125
|
function getAllowCliList(userConfig) {
|
|
118
126
|
// If user has configured allow_cli_list, use it; otherwise use defaults
|
|
119
127
|
if (userConfig.allow_cli_list && typeof userConfig.allow_cli_list === "object") {
|
|
120
|
-
return userConfig.allow_cli_list;
|
|
128
|
+
return filterRuntimeSupportedAllowCliList(userConfig.allow_cli_list);
|
|
121
129
|
}
|
|
122
130
|
return DEFAULT_CLI_LIST;
|
|
123
131
|
}
|
|
124
132
|
|
|
133
|
+
async function defaultCreatePty(command, args, options) {
|
|
134
|
+
if (!nodePtySpawnPromise) {
|
|
135
|
+
const spawnHelperInfo = ensureNodePtySpawnHelperExecutable();
|
|
136
|
+
if (spawnHelperInfo?.updated) {
|
|
137
|
+
log(`Enabled execute permission on node-pty spawn-helper: ${spawnHelperInfo.helperPath}`);
|
|
138
|
+
}
|
|
139
|
+
nodePtySpawnPromise = import("node-pty").then((mod) => {
|
|
140
|
+
if (typeof mod.spawn === "function") {
|
|
141
|
+
return mod.spawn;
|
|
142
|
+
}
|
|
143
|
+
if (mod.default && typeof mod.default.spawn === "function") {
|
|
144
|
+
return mod.default.spawn.bind(mod.default);
|
|
145
|
+
}
|
|
146
|
+
throw new Error("node-pty spawn export not found");
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
const spawnPty = await nodePtySpawnPromise;
|
|
150
|
+
return spawnPty(command, args, options);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export function ensureNodePtySpawnHelperExecutable(deps = {}) {
|
|
154
|
+
const platform = deps.platform || process.platform;
|
|
155
|
+
if (platform === "win32") {
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const arch = deps.arch || process.arch;
|
|
160
|
+
const existsSyncFn = deps.existsSync || fs.existsSync;
|
|
161
|
+
const statSyncFn = deps.statSync || fs.statSync;
|
|
162
|
+
const chmodSyncFn = deps.chmodSync || fs.chmodSync;
|
|
163
|
+
let packageJsonPath = deps.packageJsonPath || null;
|
|
164
|
+
|
|
165
|
+
if (!packageJsonPath) {
|
|
166
|
+
try {
|
|
167
|
+
packageJsonPath = moduleRequire.resolve("node-pty/package.json");
|
|
168
|
+
} catch {
|
|
169
|
+
return null;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const packageDir = path.dirname(packageJsonPath);
|
|
174
|
+
const helperCandidates = [
|
|
175
|
+
path.join(packageDir, "build", "Release", "spawn-helper"),
|
|
176
|
+
path.join(packageDir, "build", "Debug", "spawn-helper"),
|
|
177
|
+
path.join(packageDir, "prebuilds", `${platform}-${arch}`, "spawn-helper"),
|
|
178
|
+
];
|
|
179
|
+
const helperPath = helperCandidates.find((candidate) => existsSyncFn(candidate));
|
|
180
|
+
if (!helperPath) {
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const currentMode = statSyncFn(helperPath).mode & 0o777;
|
|
185
|
+
if ((currentMode & 0o111) !== 0) {
|
|
186
|
+
return { helperPath, updated: false };
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const nextMode = currentMode | 0o111;
|
|
190
|
+
chmodSyncFn(helperPath, nextMode);
|
|
191
|
+
return { helperPath, updated: true };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function normalizeOptionalString(value) {
|
|
195
|
+
if (typeof value !== "string") {
|
|
196
|
+
return null;
|
|
197
|
+
}
|
|
198
|
+
const normalized = value.trim();
|
|
199
|
+
return normalized || null;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function normalizePositiveInt(value, fallback) {
|
|
203
|
+
const parsed = Number.parseInt(String(value ?? ""), 10);
|
|
204
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
205
|
+
return parsed;
|
|
206
|
+
}
|
|
207
|
+
return fallback;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function normalizeLaunchConfig(value) {
|
|
211
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
212
|
+
return {};
|
|
213
|
+
}
|
|
214
|
+
return value;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function normalizeTerminalEnv(value) {
|
|
218
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
219
|
+
return {};
|
|
220
|
+
}
|
|
221
|
+
const env = {};
|
|
222
|
+
for (const [key, raw] of Object.entries(value)) {
|
|
223
|
+
if (typeof raw === "string") {
|
|
224
|
+
env[key] = raw;
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
if (typeof raw === "number" || typeof raw === "boolean") {
|
|
228
|
+
env[key] = String(raw);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return env;
|
|
232
|
+
}
|
|
233
|
+
|
|
125
234
|
export function startDaemon(config = {}, deps = {}) {
|
|
126
235
|
const exitFn = deps.exit || process.exit;
|
|
127
236
|
const killFn = deps.kill || process.kill;
|
|
@@ -230,6 +339,34 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
230
339
|
process.env.CONDUCTOR_SHUTDOWN_DISCONNECT_TIMEOUT_MS,
|
|
231
340
|
1000,
|
|
232
341
|
);
|
|
342
|
+
const DAEMON_WATCHDOG_INTERVAL_MS = parsePositiveInt(
|
|
343
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_INTERVAL_MS,
|
|
344
|
+
30_000,
|
|
345
|
+
);
|
|
346
|
+
const DAEMON_WATCHDOG_STALE_WS_MS = parsePositiveInt(
|
|
347
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_STALE_WS_MS,
|
|
348
|
+
75_000,
|
|
349
|
+
);
|
|
350
|
+
const DAEMON_WATCHDOG_CONNECT_GRACE_MS = parsePositiveInt(
|
|
351
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_CONNECT_GRACE_MS,
|
|
352
|
+
35_000,
|
|
353
|
+
);
|
|
354
|
+
const DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS = parsePositiveInt(
|
|
355
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS,
|
|
356
|
+
45_000,
|
|
357
|
+
);
|
|
358
|
+
const DAEMON_WATCHDOG_HTTP_TIMEOUT_MS = parsePositiveInt(
|
|
359
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_HTTP_TIMEOUT_MS,
|
|
360
|
+
5_000,
|
|
361
|
+
);
|
|
362
|
+
const DAEMON_WATCHDOG_MAX_SELF_HEALS = parsePositiveInt(
|
|
363
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_MAX_SELF_HEALS,
|
|
364
|
+
3,
|
|
365
|
+
);
|
|
366
|
+
const TERMINAL_RING_BUFFER_MAX_BYTES = parsePositiveInt(
|
|
367
|
+
config.TERMINAL_RING_BUFFER_MAX_BYTES || process.env.CONDUCTOR_TERMINAL_RING_BUFFER_MAX_BYTES,
|
|
368
|
+
DEFAULT_TERMINAL_RING_BUFFER_MAX_BYTES,
|
|
369
|
+
);
|
|
233
370
|
|
|
234
371
|
try {
|
|
235
372
|
mkdirSyncFn(WORKSPACE_ROOT, { recursive: true });
|
|
@@ -363,19 +500,45 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
363
500
|
|
|
364
501
|
let disconnectedSinceLastConnectedLog = false;
|
|
365
502
|
let didRecoverStaleTasks = false;
|
|
503
|
+
let daemonShuttingDown = false;
|
|
366
504
|
const activeTaskProcesses = new Map();
|
|
505
|
+
const activePtySessions = new Map();
|
|
367
506
|
const suppressedExitStatusReports = new Set();
|
|
368
507
|
const seenCommandRequestIds = new Set();
|
|
508
|
+
let lastConnectedAt = null;
|
|
509
|
+
let lastPongAt = null;
|
|
510
|
+
let lastInboundAt = null;
|
|
511
|
+
let lastSuccessfulHttpAt = null;
|
|
512
|
+
let lastPresenceCheckAt = null;
|
|
513
|
+
let lastPresenceConfirmedAt = null;
|
|
514
|
+
let wsConnected = false;
|
|
515
|
+
let watchdogLastHealAt = 0;
|
|
516
|
+
let watchdogHealAttempts = 0;
|
|
517
|
+
let watchdogProbeInFlight = false;
|
|
518
|
+
let watchdogLastProbeErrorAt = 0;
|
|
519
|
+
let watchdogLastPresenceMismatchAt = 0;
|
|
520
|
+
let watchdogAwaitingHealthySignalAt = null;
|
|
521
|
+
let watchdogTimer = null;
|
|
369
522
|
const logCollector = createLogCollector(BACKEND_HTTP);
|
|
523
|
+
const createPtyFn = deps.createPty || defaultCreatePty;
|
|
370
524
|
const client = createWebSocketClient(sdkConfig, {
|
|
371
525
|
extraHeaders: {
|
|
372
526
|
"x-conductor-host": AGENT_NAME,
|
|
373
527
|
"x-conductor-backends": SUPPORTED_BACKENDS.join(","),
|
|
528
|
+
"x-conductor-capabilities": "pty_task",
|
|
374
529
|
},
|
|
375
|
-
onConnected: ({ isReconnect } = { isReconnect: false }) => {
|
|
530
|
+
onConnected: ({ isReconnect, connectedAt } = { isReconnect: false, connectedAt: Date.now() }) => {
|
|
531
|
+
wsConnected = true;
|
|
532
|
+
lastConnectedAt = connectedAt || Date.now();
|
|
533
|
+
lastPongAt = lastPongAt && lastPongAt > lastConnectedAt ? lastPongAt : lastConnectedAt;
|
|
376
534
|
if (!isReconnect || disconnectedSinceLastConnectedLog) {
|
|
377
535
|
log("Connected to backend");
|
|
378
536
|
}
|
|
537
|
+
if (watchdogHealAttempts > 0) {
|
|
538
|
+
watchdogAwaitingHealthySignalAt = lastConnectedAt;
|
|
539
|
+
} else {
|
|
540
|
+
watchdogAwaitingHealthySignalAt = null;
|
|
541
|
+
}
|
|
379
542
|
disconnectedSinceLastConnectedLog = false;
|
|
380
543
|
sendAgentResume(isReconnect).catch((error) => {
|
|
381
544
|
logError(`sendAgentResume failed: ${error?.message || error}`);
|
|
@@ -391,8 +554,24 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
391
554
|
});
|
|
392
555
|
}
|
|
393
556
|
},
|
|
394
|
-
onDisconnected: () => {
|
|
557
|
+
onDisconnected: (event = {}) => {
|
|
558
|
+
wsConnected = false;
|
|
395
559
|
disconnectedSinceLastConnectedLog = true;
|
|
560
|
+
if (!daemonShuttingDown) {
|
|
561
|
+
logError(
|
|
562
|
+
`[daemon-ws] Disconnected from backend: ${formatDisconnectDiagnostics(event)} (${formatDaemonHealthState({
|
|
563
|
+
connectedAt: lastConnectedAt,
|
|
564
|
+
lastPongAt,
|
|
565
|
+
lastInboundAt,
|
|
566
|
+
lastSuccessfulHttpAt,
|
|
567
|
+
lastPresenceConfirmedAt,
|
|
568
|
+
})})`,
|
|
569
|
+
);
|
|
570
|
+
}
|
|
571
|
+
},
|
|
572
|
+
onPong: ({ at }) => {
|
|
573
|
+
lastPongAt = at;
|
|
574
|
+
markWatchdogHealthy("pong", at);
|
|
396
575
|
},
|
|
397
576
|
});
|
|
398
577
|
|
|
@@ -404,6 +583,169 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
404
583
|
logError(`Failed to connect: ${err}`);
|
|
405
584
|
});
|
|
406
585
|
|
|
586
|
+
watchdogTimer = setInterval(() => {
|
|
587
|
+
void runDaemonWatchdog();
|
|
588
|
+
}, DAEMON_WATCHDOG_INTERVAL_MS);
|
|
589
|
+
if (typeof watchdogTimer?.unref === "function") {
|
|
590
|
+
watchdogTimer.unref();
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
function markBackendHttpSuccess(at = Date.now()) {
|
|
594
|
+
lastSuccessfulHttpAt = at;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
async function probeAgentPresence() {
|
|
598
|
+
lastPresenceCheckAt = Date.now();
|
|
599
|
+
try {
|
|
600
|
+
const response = await withTimeout(
|
|
601
|
+
fetchFn(`${BACKEND_HTTP}/api/agents`, {
|
|
602
|
+
method: "GET",
|
|
603
|
+
headers: {
|
|
604
|
+
Authorization: `Bearer ${AGENT_TOKEN}`,
|
|
605
|
+
Accept: "application/json",
|
|
606
|
+
},
|
|
607
|
+
}),
|
|
608
|
+
DAEMON_WATCHDOG_HTTP_TIMEOUT_MS,
|
|
609
|
+
"daemon agent presence probe",
|
|
610
|
+
);
|
|
611
|
+
if (!response.ok) {
|
|
612
|
+
return {
|
|
613
|
+
ok: false,
|
|
614
|
+
status: response.status,
|
|
615
|
+
error: `HTTP ${response.status}`,
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
const at = Date.now();
|
|
619
|
+
markBackendHttpSuccess(at);
|
|
620
|
+
const payload = await response.json();
|
|
621
|
+
const agents = Array.isArray(payload) ? payload : [];
|
|
622
|
+
const selfOnline = agents.some((entry) => String(entry?.host || "").trim() === AGENT_NAME);
|
|
623
|
+
if (selfOnline) {
|
|
624
|
+
lastPresenceConfirmedAt = at;
|
|
625
|
+
}
|
|
626
|
+
return {
|
|
627
|
+
ok: true,
|
|
628
|
+
selfOnline,
|
|
629
|
+
agentCount: agents.length,
|
|
630
|
+
};
|
|
631
|
+
} catch (error) {
|
|
632
|
+
return {
|
|
633
|
+
ok: false,
|
|
634
|
+
status: null,
|
|
635
|
+
error: error?.message || String(error),
|
|
636
|
+
};
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
function requestWatchdogSelfHeal(reason, extra = {}) {
|
|
641
|
+
if (daemonShuttingDown || !wsConnected) {
|
|
642
|
+
return;
|
|
643
|
+
}
|
|
644
|
+
const now = Date.now();
|
|
645
|
+
if (watchdogLastHealAt && now - watchdogLastHealAt < DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS) {
|
|
646
|
+
return;
|
|
647
|
+
}
|
|
648
|
+
watchdogLastHealAt = now;
|
|
649
|
+
watchdogHealAttempts += 1;
|
|
650
|
+
logError(
|
|
651
|
+
`[watchdog] ${reason}; restarting daemon websocket (${watchdogHealAttempts}/${DAEMON_WATCHDOG_MAX_SELF_HEALS}) ${formatWatchdogExtra(extra)} (${formatDaemonHealthState({
|
|
652
|
+
connectedAt: lastConnectedAt,
|
|
653
|
+
lastPongAt,
|
|
654
|
+
lastInboundAt,
|
|
655
|
+
lastSuccessfulHttpAt,
|
|
656
|
+
lastPresenceConfirmedAt,
|
|
657
|
+
})})`,
|
|
658
|
+
);
|
|
659
|
+
if (watchdogHealAttempts > DAEMON_WATCHDOG_MAX_SELF_HEALS) {
|
|
660
|
+
daemonShuttingDown = true;
|
|
661
|
+
logError("[watchdog] Self-heal budget exceeded; exiting daemon for supervisor restart");
|
|
662
|
+
void requestShutdown("watchdog self-heal budget exceeded")
|
|
663
|
+
.catch((error) => {
|
|
664
|
+
logError(`watchdog shutdown failed: ${error?.message || error}`);
|
|
665
|
+
})
|
|
666
|
+
.finally(() => {
|
|
667
|
+
cleanupLock();
|
|
668
|
+
exitFn(1);
|
|
669
|
+
});
|
|
670
|
+
return;
|
|
671
|
+
}
|
|
672
|
+
watchdogAwaitingHealthySignalAt = null;
|
|
673
|
+
wsConnected = false;
|
|
674
|
+
disconnectedSinceLastConnectedLog = true;
|
|
675
|
+
if (typeof client.forceReconnect === "function") {
|
|
676
|
+
Promise.resolve(client.forceReconnect(`watchdog:${reason}`)).catch((error) => {
|
|
677
|
+
logError(`watchdog forceReconnect failed: ${error?.message || error}`);
|
|
678
|
+
});
|
|
679
|
+
return;
|
|
680
|
+
}
|
|
681
|
+
Promise.resolve(client.disconnect())
|
|
682
|
+
.catch((error) => {
|
|
683
|
+
logError(`watchdog disconnect failed: ${error?.message || error}`);
|
|
684
|
+
})
|
|
685
|
+
.finally(() => {
|
|
686
|
+
client.connect().catch((error) => {
|
|
687
|
+
logError(`watchdog reconnect failed: ${error?.message || error}`);
|
|
688
|
+
});
|
|
689
|
+
});
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
async function runDaemonWatchdog() {
|
|
693
|
+
if (daemonShuttingDown || !wsConnected || watchdogProbeInFlight) {
|
|
694
|
+
return;
|
|
695
|
+
}
|
|
696
|
+
const startedAt = Date.now();
|
|
697
|
+
if (!lastConnectedAt || startedAt - lastConnectedAt < DAEMON_WATCHDOG_CONNECT_GRACE_MS) {
|
|
698
|
+
return;
|
|
699
|
+
}
|
|
700
|
+
watchdogProbeInFlight = true;
|
|
701
|
+
try {
|
|
702
|
+
const probe = await probeAgentPresence();
|
|
703
|
+
const now = Date.now();
|
|
704
|
+
const lastWsHealthAt = Math.max(lastPongAt || 0, lastInboundAt || 0, lastConnectedAt || 0);
|
|
705
|
+
const staleWs = !lastWsHealthAt || now - lastWsHealthAt > DAEMON_WATCHDOG_STALE_WS_MS;
|
|
706
|
+
|
|
707
|
+
if (!probe.ok) {
|
|
708
|
+
if (now - watchdogLastProbeErrorAt >= DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS) {
|
|
709
|
+
watchdogLastProbeErrorAt = now;
|
|
710
|
+
logError(`[watchdog] agent presence probe failed: ${probe.error}`);
|
|
711
|
+
}
|
|
712
|
+
if (staleWs) {
|
|
713
|
+
requestWatchdogSelfHeal("stale_ws_health", {
|
|
714
|
+
probeAt: lastPresenceCheckAt,
|
|
715
|
+
probeStatus: probe.status,
|
|
716
|
+
probeError: probe.error,
|
|
717
|
+
lastWsHealthAt,
|
|
718
|
+
staleForMs: now - lastWsHealthAt,
|
|
719
|
+
});
|
|
720
|
+
}
|
|
721
|
+
return;
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
if (!probe.selfOnline && now - watchdogLastPresenceMismatchAt >= DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS) {
|
|
725
|
+
watchdogLastPresenceMismatchAt = now;
|
|
726
|
+
logError(`[watchdog] agent presence probe did not include current host; skipping self-heal to avoid false positives on non-sticky HTTP/WS deployments (${formatWatchdogExtra({
|
|
727
|
+
agentCount: probe.agentCount,
|
|
728
|
+
probeAt: lastPresenceCheckAt,
|
|
729
|
+
})})`);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
if (staleWs) {
|
|
733
|
+
requestWatchdogSelfHeal("stale_ws_health", {
|
|
734
|
+
agentCount: probe.agentCount,
|
|
735
|
+
lastWsHealthAt,
|
|
736
|
+
staleForMs: now - lastWsHealthAt,
|
|
737
|
+
probeAt: lastPresenceCheckAt,
|
|
738
|
+
});
|
|
739
|
+
}
|
|
740
|
+
} finally {
|
|
741
|
+
watchdogProbeInFlight = false;
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
const getActiveTaskIds = () => [
|
|
746
|
+
...new Set([...activeTaskProcesses.keys(), ...activePtySessions.keys()]),
|
|
747
|
+
];
|
|
748
|
+
|
|
407
749
|
async function recoverStaleTasks() {
|
|
408
750
|
try {
|
|
409
751
|
const response = await fetchFn(`${BACKEND_HTTP}/api/tasks`, {
|
|
@@ -417,6 +759,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
417
759
|
logError(`Failed to recover stale tasks: HTTP ${response.status}`);
|
|
418
760
|
return;
|
|
419
761
|
}
|
|
762
|
+
markBackendHttpSuccess();
|
|
420
763
|
|
|
421
764
|
const tasks = await response.json();
|
|
422
765
|
if (!Array.isArray(tasks) || tasks.length === 0) {
|
|
@@ -448,6 +791,8 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
448
791
|
});
|
|
449
792
|
if (!patchResp.ok) {
|
|
450
793
|
logError(`Failed to mark stale task ${taskId} as killed: HTTP ${patchResp.status}`);
|
|
794
|
+
} else {
|
|
795
|
+
markBackendHttpSuccess();
|
|
451
796
|
}
|
|
452
797
|
}),
|
|
453
798
|
);
|
|
@@ -471,11 +816,12 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
471
816
|
logError(`Failed to reconcile tasks: HTTP ${response.status}`);
|
|
472
817
|
return;
|
|
473
818
|
}
|
|
819
|
+
markBackendHttpSuccess();
|
|
474
820
|
const tasks = await response.json();
|
|
475
821
|
if (!Array.isArray(tasks)) {
|
|
476
822
|
return;
|
|
477
823
|
}
|
|
478
|
-
const localTaskIds = new Set(
|
|
824
|
+
const localTaskIds = new Set(getActiveTaskIds());
|
|
479
825
|
const assigned = tasks.filter((task) => {
|
|
480
826
|
const agentHost = String(task?.agent_host || "").trim();
|
|
481
827
|
const status = String(task?.status || "").trim().toLowerCase();
|
|
@@ -500,6 +846,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
500
846
|
});
|
|
501
847
|
if (patchResp.ok) {
|
|
502
848
|
killedCount += 1;
|
|
849
|
+
markBackendHttpSuccess();
|
|
503
850
|
} else {
|
|
504
851
|
logError(`Failed to reconcile stale task ${taskId}: HTTP ${patchResp.status}`);
|
|
505
852
|
}
|
|
@@ -519,7 +866,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
519
866
|
await client.sendJson({
|
|
520
867
|
type: "agent_resume",
|
|
521
868
|
payload: {
|
|
522
|
-
active_tasks:
|
|
869
|
+
active_tasks: getActiveTaskIds(),
|
|
523
870
|
source: "conductor-daemon",
|
|
524
871
|
metadata: { is_reconnect: Boolean(isReconnect) },
|
|
525
872
|
},
|
|
@@ -554,7 +901,446 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
554
901
|
});
|
|
555
902
|
}
|
|
556
903
|
|
|
904
|
+
function sendTerminalEvent(type, payload) {
|
|
905
|
+
return client.sendJson({
|
|
906
|
+
type,
|
|
907
|
+
payload,
|
|
908
|
+
});
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
function resolvePtyLaunchSpec(launchConfig, fallbackCwd) {
|
|
912
|
+
const normalizedLaunchConfig = normalizeLaunchConfig(launchConfig);
|
|
913
|
+
const entrypointType =
|
|
914
|
+
normalizeOptionalString(normalizedLaunchConfig.entrypoint_type) ||
|
|
915
|
+
normalizeOptionalString(normalizedLaunchConfig.entrypointType) ||
|
|
916
|
+
(normalizeOptionalString(normalizedLaunchConfig.tool_preset) ||
|
|
917
|
+
normalizeOptionalString(normalizedLaunchConfig.toolPreset)
|
|
918
|
+
? "tool_preset"
|
|
919
|
+
: "shell");
|
|
920
|
+
const preferredShell =
|
|
921
|
+
normalizeOptionalString(normalizedLaunchConfig.shell) ||
|
|
922
|
+
process.env.SHELL ||
|
|
923
|
+
"/bin/zsh";
|
|
924
|
+
const cwd =
|
|
925
|
+
normalizeOptionalString(normalizedLaunchConfig.cwd) ||
|
|
926
|
+
fallbackCwd;
|
|
927
|
+
const env = normalizeTerminalEnv(normalizedLaunchConfig.env);
|
|
928
|
+
const cols = normalizePositiveInt(
|
|
929
|
+
normalizedLaunchConfig.cols ?? normalizedLaunchConfig.columns,
|
|
930
|
+
DEFAULT_TERMINAL_COLS,
|
|
931
|
+
);
|
|
932
|
+
const rows = normalizePositiveInt(
|
|
933
|
+
normalizedLaunchConfig.rows,
|
|
934
|
+
DEFAULT_TERMINAL_ROWS,
|
|
935
|
+
);
|
|
936
|
+
|
|
937
|
+
if (entrypointType === "tool_preset") {
|
|
938
|
+
const toolPreset =
|
|
939
|
+
normalizeOptionalString(normalizedLaunchConfig.tool_preset) ||
|
|
940
|
+
normalizeOptionalString(normalizedLaunchConfig.toolPreset) ||
|
|
941
|
+
SUPPORTED_BACKENDS[0] ||
|
|
942
|
+
"codex";
|
|
943
|
+
const cliCommand = ALLOW_CLI_LIST[toolPreset];
|
|
944
|
+
if (!cliCommand) {
|
|
945
|
+
throw new Error(`Unsupported tool preset: ${toolPreset}`);
|
|
946
|
+
}
|
|
947
|
+
return {
|
|
948
|
+
entrypointType,
|
|
949
|
+
toolPreset,
|
|
950
|
+
command: preferredShell,
|
|
951
|
+
args: ["-lc", cliCommand],
|
|
952
|
+
shell: preferredShell,
|
|
953
|
+
cwd,
|
|
954
|
+
env,
|
|
955
|
+
cols,
|
|
956
|
+
rows,
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
if (entrypointType === "custom") {
|
|
961
|
+
const command = normalizeOptionalString(normalizedLaunchConfig.command);
|
|
962
|
+
if (!command) {
|
|
963
|
+
throw new Error("launch_config.command is required for custom entrypoint");
|
|
964
|
+
}
|
|
965
|
+
const args = Array.isArray(normalizedLaunchConfig.args)
|
|
966
|
+
? normalizedLaunchConfig.args.filter((value) => typeof value === "string")
|
|
967
|
+
: [];
|
|
968
|
+
return {
|
|
969
|
+
entrypointType,
|
|
970
|
+
toolPreset: null,
|
|
971
|
+
command,
|
|
972
|
+
args,
|
|
973
|
+
shell: preferredShell,
|
|
974
|
+
cwd,
|
|
975
|
+
env,
|
|
976
|
+
cols,
|
|
977
|
+
rows,
|
|
978
|
+
};
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
return {
|
|
982
|
+
entrypointType: "shell",
|
|
983
|
+
toolPreset: null,
|
|
984
|
+
command: preferredShell,
|
|
985
|
+
args: ["-l"],
|
|
986
|
+
shell: preferredShell,
|
|
987
|
+
cwd,
|
|
988
|
+
env,
|
|
989
|
+
cols,
|
|
990
|
+
rows,
|
|
991
|
+
};
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
function getTerminalChunkByteLength(data) {
|
|
995
|
+
return Buffer.byteLength(data, "utf8");
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
function trimTerminalChunkToTailBytes(data, maxBytes) {
|
|
999
|
+
const encoded = Buffer.from(data, "utf8");
|
|
1000
|
+
if (encoded.length <= maxBytes) {
|
|
1001
|
+
return data;
|
|
1002
|
+
}
|
|
1003
|
+
const tail = encoded.subarray(encoded.length - maxBytes);
|
|
1004
|
+
let start = 0;
|
|
1005
|
+
while (start < tail.length && (tail[start] & 0b1100_0000) === 0b1000_0000) {
|
|
1006
|
+
start += 1;
|
|
1007
|
+
}
|
|
1008
|
+
return tail.subarray(start).toString("utf8");
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
function bufferTerminalOutput(record, data) {
|
|
1012
|
+
record.outputSeq += 1;
|
|
1013
|
+
let bufferedData = typeof data === "string" ? data : String(data ?? "");
|
|
1014
|
+
let byteLength = getTerminalChunkByteLength(bufferedData);
|
|
1015
|
+
if (byteLength > TERMINAL_RING_BUFFER_MAX_BYTES) {
|
|
1016
|
+
bufferedData = trimTerminalChunkToTailBytes(bufferedData, TERMINAL_RING_BUFFER_MAX_BYTES);
|
|
1017
|
+
byteLength = getTerminalChunkByteLength(bufferedData);
|
|
1018
|
+
}
|
|
1019
|
+
record.ringBuffer.push({ seq: record.outputSeq, data: bufferedData, byteLength });
|
|
1020
|
+
record.ringBufferByteLength += byteLength;
|
|
1021
|
+
while (record.ringBufferByteLength > TERMINAL_RING_BUFFER_MAX_BYTES && record.ringBuffer.length > 0) {
|
|
1022
|
+
const removed = record.ringBuffer.shift();
|
|
1023
|
+
record.ringBufferByteLength -= removed?.byteLength ?? 0;
|
|
1024
|
+
}
|
|
1025
|
+
return record.outputSeq;
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
function attachPtyStreamHandlers(taskId, record) {
|
|
1029
|
+
const writeLogChunk = (chunk) => {
|
|
1030
|
+
if (record.logStream) {
|
|
1031
|
+
record.logStream.write(chunk);
|
|
1032
|
+
}
|
|
1033
|
+
};
|
|
1034
|
+
|
|
1035
|
+
record.pty.onData((data) => {
|
|
1036
|
+
writeLogChunk(data);
|
|
1037
|
+
const seq = bufferTerminalOutput(record, data);
|
|
1038
|
+
sendTerminalEvent("terminal_output", {
|
|
1039
|
+
task_id: taskId,
|
|
1040
|
+
project_id: record.projectId,
|
|
1041
|
+
pty_session_id: record.ptySessionId,
|
|
1042
|
+
seq,
|
|
1043
|
+
data,
|
|
1044
|
+
}).catch((err) => {
|
|
1045
|
+
logError(`Failed to report terminal_output for ${taskId}: ${err?.message || err}`);
|
|
1046
|
+
});
|
|
1047
|
+
});
|
|
1048
|
+
|
|
1049
|
+
record.pty.onExit(({ exitCode, signal }) => {
|
|
1050
|
+
if (record.stopForceKillTimer) {
|
|
1051
|
+
clearTimeout(record.stopForceKillTimer);
|
|
1052
|
+
}
|
|
1053
|
+
activePtySessions.delete(taskId);
|
|
1054
|
+
if (record.logStream) {
|
|
1055
|
+
const ts = new Date().toLocaleString("sv-SE", { timeZone: "Asia/Shanghai" }).replace(" ", "T");
|
|
1056
|
+
record.logStream.write(
|
|
1057
|
+
`[daemon ${ts}] pty exited exitCode=${exitCode ?? "null"} signal=${signal ?? "null"}\n`,
|
|
1058
|
+
);
|
|
1059
|
+
record.logStream.end();
|
|
1060
|
+
}
|
|
1061
|
+
const closedAt = new Date().toISOString();
|
|
1062
|
+
log(`PTY task ${taskId} exited with code=${exitCode ?? "null"} signal=${signal ?? "null"}`);
|
|
1063
|
+
sendTerminalEvent("terminal_exit", {
|
|
1064
|
+
task_id: taskId,
|
|
1065
|
+
project_id: record.projectId,
|
|
1066
|
+
pty_session_id: record.ptySessionId,
|
|
1067
|
+
exit_code: exitCode ?? null,
|
|
1068
|
+
signal: signal ?? null,
|
|
1069
|
+
seq: record.outputSeq,
|
|
1070
|
+
closed_at: closedAt,
|
|
1071
|
+
}).catch((err) => {
|
|
1072
|
+
logError(`Failed to report terminal_exit for ${taskId}: ${err?.message || err}`);
|
|
1073
|
+
});
|
|
1074
|
+
});
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
function resizePty(record, cols, rows) {
|
|
1078
|
+
const nextCols = normalizePositiveInt(cols, record.cols || DEFAULT_TERMINAL_COLS);
|
|
1079
|
+
const nextRows = normalizePositiveInt(rows, record.rows || DEFAULT_TERMINAL_ROWS);
|
|
1080
|
+
record.cols = nextCols;
|
|
1081
|
+
record.rows = nextRows;
|
|
1082
|
+
if (typeof record.pty.resize === "function") {
|
|
1083
|
+
record.pty.resize(nextCols, nextRows);
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
async function handleCreatePtyTask(payload) {
|
|
1088
|
+
const taskId = payload?.task_id ? String(payload.task_id) : "";
|
|
1089
|
+
const projectId = payload?.project_id ? String(payload.project_id) : "";
|
|
1090
|
+
const ptySessionId = payload?.pty_session_id ? String(payload.pty_session_id) : "";
|
|
1091
|
+
const requestId = payload?.request_id ? String(payload.request_id) : "";
|
|
1092
|
+
const launchConfig = normalizeLaunchConfig(payload?.launch_config);
|
|
1093
|
+
|
|
1094
|
+
if (!taskId || !projectId || !ptySessionId) {
|
|
1095
|
+
logError(`Invalid create_pty_task payload: ${JSON.stringify(payload)}`);
|
|
1096
|
+
sendAgentCommandAck({
|
|
1097
|
+
requestId,
|
|
1098
|
+
taskId,
|
|
1099
|
+
eventType: "create_pty_task",
|
|
1100
|
+
accepted: false,
|
|
1101
|
+
}).catch(() => {});
|
|
1102
|
+
return;
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
if (requestId && !markRequestSeen(requestId)) {
|
|
1106
|
+
log(`Duplicate create_pty_task ignored for ${taskId} (request_id=${requestId})`);
|
|
1107
|
+
sendAgentCommandAck({
|
|
1108
|
+
requestId,
|
|
1109
|
+
taskId,
|
|
1110
|
+
eventType: "create_pty_task",
|
|
1111
|
+
accepted: true,
|
|
1112
|
+
}).catch(() => {});
|
|
1113
|
+
return;
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
if (activeTaskProcesses.has(taskId) || activePtySessions.has(taskId)) {
|
|
1117
|
+
log(`Duplicate create_pty_task ignored for ${taskId}: task already active`);
|
|
1118
|
+
sendAgentCommandAck({
|
|
1119
|
+
requestId,
|
|
1120
|
+
taskId,
|
|
1121
|
+
eventType: "create_pty_task",
|
|
1122
|
+
accepted: true,
|
|
1123
|
+
}).catch(() => {});
|
|
1124
|
+
return;
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
let boundPath = await getProjectLocalPath(projectId);
|
|
1128
|
+
let taskDir = normalizeOptionalString(launchConfig.cwd) || boundPath;
|
|
1129
|
+
if (!taskDir) {
|
|
1130
|
+
const now = new Date();
|
|
1131
|
+
const dayDir = path.join(WORKSPACE_ROOT, formatWorkspaceDate(now));
|
|
1132
|
+
const runTimestampPart = formatWorkspaceRunTimestamp(now);
|
|
1133
|
+
const taskSuffix = taskId.replace(/[^a-zA-Z0-9]/g, "").slice(0, 8) || String(process.pid);
|
|
1134
|
+
// PTY login shells can exit immediately if their cwd is renamed right after spawn.
|
|
1135
|
+
const pendingRunDir = `${runTimestampPart}_pty_${taskSuffix}`;
|
|
1136
|
+
taskDir = path.join(dayDir, pendingRunDir);
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
try {
|
|
1140
|
+
mkdirSyncFn(taskDir, { recursive: true });
|
|
1141
|
+
} catch (err) {
|
|
1142
|
+
logError(`Failed to create PTY workspace ${taskDir}: ${err?.message || err}`);
|
|
1143
|
+
sendAgentCommandAck({
|
|
1144
|
+
requestId,
|
|
1145
|
+
taskId,
|
|
1146
|
+
eventType: "create_pty_task",
|
|
1147
|
+
accepted: false,
|
|
1148
|
+
}).catch(() => {});
|
|
1149
|
+
return;
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
let launchSpec;
|
|
1153
|
+
try {
|
|
1154
|
+
launchSpec = resolvePtyLaunchSpec(launchConfig, taskDir);
|
|
1155
|
+
} catch (error) {
|
|
1156
|
+
logError(`Failed to resolve PTY launch config for ${taskId}: ${error?.message || error}`);
|
|
1157
|
+
sendAgentCommandAck({
|
|
1158
|
+
requestId,
|
|
1159
|
+
taskId,
|
|
1160
|
+
eventType: "create_pty_task",
|
|
1161
|
+
accepted: false,
|
|
1162
|
+
}).catch(() => {});
|
|
1163
|
+
sendTerminalEvent("terminal_error", {
|
|
1164
|
+
task_id: taskId,
|
|
1165
|
+
project_id: projectId,
|
|
1166
|
+
pty_session_id: ptySessionId,
|
|
1167
|
+
message: error?.message || String(error),
|
|
1168
|
+
}).catch(() => {});
|
|
1169
|
+
return;
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
sendAgentCommandAck({
|
|
1173
|
+
requestId,
|
|
1174
|
+
taskId,
|
|
1175
|
+
eventType: "create_pty_task",
|
|
1176
|
+
accepted: true,
|
|
1177
|
+
}).catch((err) => {
|
|
1178
|
+
logError(`Failed to report agent_command_ack(create_pty_task) for ${taskId}: ${err?.message || err}`);
|
|
1179
|
+
});
|
|
1180
|
+
|
|
1181
|
+
const env = {
|
|
1182
|
+
...process.env,
|
|
1183
|
+
...launchSpec.env,
|
|
1184
|
+
CONDUCTOR_PROJECT_ID: projectId,
|
|
1185
|
+
CONDUCTOR_TASK_ID: taskId,
|
|
1186
|
+
CONDUCTOR_PTY_SESSION_ID: ptySessionId,
|
|
1187
|
+
};
|
|
1188
|
+
if (config.CONFIG_FILE) {
|
|
1189
|
+
env.CONDUCTOR_CONFIG = config.CONFIG_FILE;
|
|
1190
|
+
}
|
|
1191
|
+
if (AGENT_TOKEN) {
|
|
1192
|
+
env.CONDUCTOR_AGENT_TOKEN = AGENT_TOKEN;
|
|
1193
|
+
}
|
|
1194
|
+
if (BACKEND_HTTP) {
|
|
1195
|
+
env.CONDUCTOR_BACKEND_URL = BACKEND_HTTP;
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
const logPath = path.join(launchSpec.cwd, "conductor-terminal.log");
|
|
1199
|
+
let logStream;
|
|
1200
|
+
try {
|
|
1201
|
+
logStream = createWriteStreamFn(logPath, { flags: "a" });
|
|
1202
|
+
if (logStream && typeof logStream.on === "function") {
|
|
1203
|
+
const logPathSnapshot = logPath;
|
|
1204
|
+
logStream.on("error", (err) => {
|
|
1205
|
+
logError(`Terminal log stream error (${logPathSnapshot}): ${err?.message || err}`);
|
|
1206
|
+
});
|
|
1207
|
+
}
|
|
1208
|
+
} catch (err) {
|
|
1209
|
+
logError(`Failed to open PTY log file ${logPath}: ${err?.message || err}`);
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
try {
|
|
1213
|
+
const pty = await createPtyFn(launchSpec.command, launchSpec.args, {
|
|
1214
|
+
name: "xterm-256color",
|
|
1215
|
+
cols: launchSpec.cols,
|
|
1216
|
+
rows: launchSpec.rows,
|
|
1217
|
+
cwd: launchSpec.cwd,
|
|
1218
|
+
env,
|
|
1219
|
+
});
|
|
1220
|
+
const resolvedLogPath = path.join(taskDir, "conductor-terminal.log");
|
|
1221
|
+
|
|
1222
|
+
const startedAt = new Date().toISOString();
|
|
1223
|
+
const record = {
|
|
1224
|
+
kind: "pty",
|
|
1225
|
+
pty,
|
|
1226
|
+
ptySessionId,
|
|
1227
|
+
projectId,
|
|
1228
|
+
taskDir,
|
|
1229
|
+
logPath: resolvedLogPath,
|
|
1230
|
+
logStream,
|
|
1231
|
+
cols: launchSpec.cols,
|
|
1232
|
+
rows: launchSpec.rows,
|
|
1233
|
+
shell: launchSpec.shell,
|
|
1234
|
+
startedAt,
|
|
1235
|
+
outputSeq: 0,
|
|
1236
|
+
ringBuffer: [],
|
|
1237
|
+
ringBufferByteLength: 0,
|
|
1238
|
+
stopForceKillTimer: null,
|
|
1239
|
+
};
|
|
1240
|
+
activePtySessions.set(taskId, record);
|
|
1241
|
+
attachPtyStreamHandlers(taskId, record);
|
|
1242
|
+
|
|
1243
|
+
log(`Created PTY task ${taskId} (${launchSpec.entrypointType}) cwd=${launchSpec.cwd}`);
|
|
1244
|
+
sendTerminalEvent("terminal_opened", {
|
|
1245
|
+
task_id: taskId,
|
|
1246
|
+
project_id: projectId,
|
|
1247
|
+
pty_session_id: ptySessionId,
|
|
1248
|
+
pid: Number.isInteger(pty?.pid) ? pty.pid : null,
|
|
1249
|
+
cwd: taskDir,
|
|
1250
|
+
shell: launchSpec.shell,
|
|
1251
|
+
cols: launchSpec.cols,
|
|
1252
|
+
rows: launchSpec.rows,
|
|
1253
|
+
started_at: startedAt,
|
|
1254
|
+
}).catch((err) => {
|
|
1255
|
+
logError(`Failed to report terminal_opened for ${taskId}: ${err?.message || err}`);
|
|
1256
|
+
});
|
|
1257
|
+
} catch (error) {
|
|
1258
|
+
if (logStream) {
|
|
1259
|
+
logStream.end();
|
|
1260
|
+
}
|
|
1261
|
+
logError(`Failed to create PTY task ${taskId}: ${error?.message || error}`);
|
|
1262
|
+
sendTerminalEvent("terminal_error", {
|
|
1263
|
+
task_id: taskId,
|
|
1264
|
+
project_id: projectId,
|
|
1265
|
+
pty_session_id: ptySessionId,
|
|
1266
|
+
message: error?.message || String(error),
|
|
1267
|
+
}).catch(() => {});
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
async function handleTerminalAttach(payload) {
|
|
1272
|
+
const taskId = payload?.task_id ? String(payload.task_id) : "";
|
|
1273
|
+
if (!taskId) return;
|
|
1274
|
+
const record = activePtySessions.get(taskId);
|
|
1275
|
+
if (!record) {
|
|
1276
|
+
sendTerminalEvent("terminal_error", {
|
|
1277
|
+
task_id: taskId,
|
|
1278
|
+
pty_session_id: payload?.pty_session_id ? String(payload.pty_session_id) : null,
|
|
1279
|
+
message: "terminal session not found",
|
|
1280
|
+
}).catch(() => {});
|
|
1281
|
+
return;
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
if (payload?.cols || payload?.rows) {
|
|
1285
|
+
resizePty(record, payload?.cols, payload?.rows);
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
await sendTerminalEvent("terminal_opened", {
|
|
1289
|
+
task_id: taskId,
|
|
1290
|
+
project_id: record.projectId,
|
|
1291
|
+
pty_session_id: record.ptySessionId,
|
|
1292
|
+
pid: Number.isInteger(record.pty?.pid) ? record.pty.pid : null,
|
|
1293
|
+
cwd: record.taskDir,
|
|
1294
|
+
shell: record.shell,
|
|
1295
|
+
cols: record.cols,
|
|
1296
|
+
rows: record.rows,
|
|
1297
|
+
started_at: record.startedAt,
|
|
1298
|
+
}).catch((err) => {
|
|
1299
|
+
logError(`Failed to report terminal_opened on attach for ${taskId}: ${err?.message || err}`);
|
|
1300
|
+
});
|
|
1301
|
+
|
|
1302
|
+
const lastSeq = normalizePositiveInt(payload?.last_seq ?? payload?.lastSeq, 0);
|
|
1303
|
+
for (const chunk of record.ringBuffer) {
|
|
1304
|
+
if (chunk.seq <= lastSeq) continue;
|
|
1305
|
+
await sendTerminalEvent("terminal_output", {
|
|
1306
|
+
task_id: taskId,
|
|
1307
|
+
project_id: record.projectId,
|
|
1308
|
+
pty_session_id: record.ptySessionId,
|
|
1309
|
+
seq: chunk.seq,
|
|
1310
|
+
data: chunk.data,
|
|
1311
|
+
}).catch((err) => {
|
|
1312
|
+
logError(`Failed to replay terminal_output for ${taskId}: ${err?.message || err}`);
|
|
1313
|
+
});
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1317
|
+
function handleTerminalInput(payload) {
|
|
1318
|
+
const taskId = payload?.task_id ? String(payload.task_id) : "";
|
|
1319
|
+
const data = typeof payload?.data === "string" ? payload.data : "";
|
|
1320
|
+
if (!taskId || !data) return;
|
|
1321
|
+
const record = activePtySessions.get(taskId);
|
|
1322
|
+
if (!record || typeof record.pty.write !== "function") {
|
|
1323
|
+
return;
|
|
1324
|
+
}
|
|
1325
|
+
record.pty.write(data);
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
function handleTerminalResize(payload) {
|
|
1329
|
+
const taskId = payload?.task_id ? String(payload.task_id) : "";
|
|
1330
|
+
if (!taskId) return;
|
|
1331
|
+
const record = activePtySessions.get(taskId);
|
|
1332
|
+
if (!record) return;
|
|
1333
|
+
resizePty(record, payload?.cols, payload?.rows);
|
|
1334
|
+
}
|
|
1335
|
+
|
|
1336
|
+
function handleTerminalDetach(_payload) {
|
|
1337
|
+
// PTY sessions stay alive without viewers. Detach is currently a no-op.
|
|
1338
|
+
}
|
|
1339
|
+
|
|
557
1340
|
function handleEvent(event) {
|
|
1341
|
+
const receivedAt = Date.now();
|
|
1342
|
+
lastInboundAt = receivedAt;
|
|
1343
|
+
markWatchdogHealthy("inbound", receivedAt);
|
|
558
1344
|
if (event.type === "error") {
|
|
559
1345
|
const payload = event?.payload && typeof event.payload === "object" ? event.payload : {};
|
|
560
1346
|
const planLimitMessage = getPlanLimitMessage(payload);
|
|
@@ -575,15 +1361,55 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
575
1361
|
handleCreateTask(event.payload);
|
|
576
1362
|
return;
|
|
577
1363
|
}
|
|
1364
|
+
if (event.type === "create_pty_task") {
|
|
1365
|
+
void handleCreatePtyTask(event.payload);
|
|
1366
|
+
return;
|
|
1367
|
+
}
|
|
578
1368
|
if (event.type === "stop_task") {
|
|
579
1369
|
handleStopTask(event.payload);
|
|
580
1370
|
return;
|
|
581
1371
|
}
|
|
1372
|
+
if (event.type === "terminal_attach") {
|
|
1373
|
+
void handleTerminalAttach(event.payload);
|
|
1374
|
+
return;
|
|
1375
|
+
}
|
|
1376
|
+
if (event.type === "terminal_input") {
|
|
1377
|
+
handleTerminalInput(event.payload);
|
|
1378
|
+
return;
|
|
1379
|
+
}
|
|
1380
|
+
if (event.type === "terminal_resize") {
|
|
1381
|
+
handleTerminalResize(event.payload);
|
|
1382
|
+
return;
|
|
1383
|
+
}
|
|
1384
|
+
if (event.type === "terminal_detach") {
|
|
1385
|
+
handleTerminalDetach(event.payload);
|
|
1386
|
+
return;
|
|
1387
|
+
}
|
|
582
1388
|
if (event.type === "collect_logs") {
|
|
583
1389
|
void handleCollectLogs(event.payload);
|
|
584
1390
|
}
|
|
585
1391
|
}
|
|
586
1392
|
|
|
1393
|
+
function markWatchdogHealthy(signal, at = Date.now()) {
|
|
1394
|
+
if (!watchdogAwaitingHealthySignalAt || watchdogHealAttempts === 0) {
|
|
1395
|
+
return;
|
|
1396
|
+
}
|
|
1397
|
+
if (at < watchdogAwaitingHealthySignalAt) {
|
|
1398
|
+
return;
|
|
1399
|
+
}
|
|
1400
|
+
log(
|
|
1401
|
+
`[watchdog] Backend websocket healthy again after self-heal via ${signal} (${formatDaemonHealthState({
|
|
1402
|
+
connectedAt: lastConnectedAt,
|
|
1403
|
+
lastPongAt,
|
|
1404
|
+
lastInboundAt,
|
|
1405
|
+
lastSuccessfulHttpAt,
|
|
1406
|
+
lastPresenceConfirmedAt,
|
|
1407
|
+
})})`,
|
|
1408
|
+
);
|
|
1409
|
+
watchdogAwaitingHealthySignalAt = null;
|
|
1410
|
+
watchdogHealAttempts = 0;
|
|
1411
|
+
}
|
|
1412
|
+
|
|
587
1413
|
async function handleCollectLogs(payload) {
|
|
588
1414
|
const requestId = payload?.request_id ? String(payload.request_id).trim() : "";
|
|
589
1415
|
const taskId = payload?.task_id ? String(payload.task_id).trim() : "";
|
|
@@ -672,8 +1498,9 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
672
1498
|
});
|
|
673
1499
|
};
|
|
674
1500
|
|
|
675
|
-
const
|
|
676
|
-
|
|
1501
|
+
const processRecord = activeTaskProcesses.get(taskId);
|
|
1502
|
+
const ptyRecord = activePtySessions.get(taskId);
|
|
1503
|
+
if ((!processRecord || !processRecord.child) && !ptyRecord) {
|
|
677
1504
|
log(`Stop requested for task ${taskId}, but no active process found`);
|
|
678
1505
|
sendStopAck(false);
|
|
679
1506
|
return;
|
|
@@ -684,36 +1511,58 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
684
1511
|
|
|
685
1512
|
sendStopAck(true);
|
|
686
1513
|
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
1514
|
+
const activeRecord = processRecord || ptyRecord;
|
|
1515
|
+
if (activeRecord?.stopForceKillTimer) {
|
|
1516
|
+
clearTimeout(activeRecord.stopForceKillTimer);
|
|
1517
|
+
activeRecord.stopForceKillTimer = null;
|
|
690
1518
|
}
|
|
691
1519
|
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
1520
|
+
if (processRecord?.child) {
|
|
1521
|
+
try {
|
|
1522
|
+
if (typeof processRecord.child.kill === "function") {
|
|
1523
|
+
processRecord.child.kill("SIGTERM");
|
|
1524
|
+
}
|
|
1525
|
+
} catch (error) {
|
|
1526
|
+
logError(`Failed to stop task ${taskId}: ${error?.message || error}`);
|
|
1527
|
+
}
|
|
1528
|
+
} else if (ptyRecord?.pty) {
|
|
1529
|
+
try {
|
|
1530
|
+
if (typeof ptyRecord.pty.kill === "function") {
|
|
1531
|
+
ptyRecord.pty.kill("SIGTERM");
|
|
1532
|
+
}
|
|
1533
|
+
} catch (error) {
|
|
1534
|
+
logError(`Failed to stop PTY task ${taskId}: ${error?.message || error}`);
|
|
695
1535
|
}
|
|
696
|
-
} catch (error) {
|
|
697
|
-
logError(`Failed to stop task ${taskId}: ${error?.message || error}`);
|
|
698
1536
|
}
|
|
699
1537
|
|
|
700
|
-
|
|
701
|
-
const
|
|
702
|
-
|
|
1538
|
+
activeRecord.stopForceKillTimer = setTimeout(() => {
|
|
1539
|
+
const latestProcess = activeTaskProcesses.get(taskId);
|
|
1540
|
+
const latestPty = activePtySessions.get(taskId);
|
|
1541
|
+
if (latestProcess?.child && processRecord?.child && latestProcess.child === processRecord.child) {
|
|
1542
|
+
try {
|
|
1543
|
+
if (typeof latestProcess.child.kill === "function") {
|
|
1544
|
+
log(`Task ${taskId} did not exit after SIGTERM, sending SIGKILL`);
|
|
1545
|
+
latestProcess.child.kill("SIGKILL");
|
|
1546
|
+
}
|
|
1547
|
+
} catch (error) {
|
|
1548
|
+
logError(`Failed to SIGKILL task ${taskId}: ${error?.message || error}`);
|
|
1549
|
+
}
|
|
703
1550
|
return;
|
|
704
1551
|
}
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
1552
|
+
if (latestPty?.pty && ptyRecord?.pty && latestPty.pty === ptyRecord.pty) {
|
|
1553
|
+
try {
|
|
1554
|
+
if (typeof latestPty.pty.kill === "function") {
|
|
1555
|
+
log(`PTY task ${taskId} did not exit after SIGTERM, sending SIGKILL`);
|
|
1556
|
+
latestPty.pty.kill("SIGKILL");
|
|
1557
|
+
}
|
|
1558
|
+
} catch (error) {
|
|
1559
|
+
logError(`Failed to SIGKILL PTY task ${taskId}: ${error?.message || error}`);
|
|
709
1560
|
}
|
|
710
|
-
} catch (error) {
|
|
711
|
-
logError(`Failed to SIGKILL task ${taskId}: ${error?.message || error}`);
|
|
712
1561
|
}
|
|
713
1562
|
}, STOP_FORCE_KILL_TIMEOUT_MS);
|
|
714
1563
|
|
|
715
|
-
if (typeof
|
|
716
|
-
|
|
1564
|
+
if (typeof activeRecord.stopForceKillTimer?.unref === "function") {
|
|
1565
|
+
activeRecord.stopForceKillTimer.unref();
|
|
717
1566
|
}
|
|
718
1567
|
}
|
|
719
1568
|
|
|
@@ -817,7 +1666,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
817
1666
|
}
|
|
818
1667
|
|
|
819
1668
|
// Validate and get CLI command for the backend
|
|
820
|
-
const effectiveBackend = backendType || SUPPORTED_BACKENDS[0];
|
|
1669
|
+
const effectiveBackend = normalizeRuntimeBackendName(backendType || SUPPORTED_BACKENDS[0]);
|
|
821
1670
|
if (!SUPPORTED_BACKENDS.includes(effectiveBackend)) {
|
|
822
1671
|
logError(`Unsupported backend: ${effectiveBackend}. Supported: ${SUPPORTED_BACKENDS.join(", ")}`);
|
|
823
1672
|
sendAgentCommandAck({
|
|
@@ -1060,7 +1909,14 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1060
1909
|
}
|
|
1061
1910
|
|
|
1062
1911
|
closePromise = (async () => {
|
|
1063
|
-
|
|
1912
|
+
daemonShuttingDown = true;
|
|
1913
|
+
if (watchdogTimer) {
|
|
1914
|
+
clearInterval(watchdogTimer);
|
|
1915
|
+
watchdogTimer = null;
|
|
1916
|
+
}
|
|
1917
|
+
const activeProcessEntries = [...activeTaskProcesses.entries()];
|
|
1918
|
+
const activePtyEntries = [...activePtySessions.entries()];
|
|
1919
|
+
const activeEntries = [...activeProcessEntries, ...activePtyEntries];
|
|
1064
1920
|
if (activeEntries.length > 0) {
|
|
1065
1921
|
log(`Shutdown requested (${reason}); stopping ${activeEntries.length} active task(s)`);
|
|
1066
1922
|
}
|
|
@@ -1088,7 +1944,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1088
1944
|
}),
|
|
1089
1945
|
);
|
|
1090
1946
|
|
|
1091
|
-
for (const [taskId, record] of
|
|
1947
|
+
for (const [taskId, record] of activeProcessEntries) {
|
|
1092
1948
|
if (record?.stopForceKillTimer) {
|
|
1093
1949
|
clearTimeout(record.stopForceKillTimer);
|
|
1094
1950
|
}
|
|
@@ -1101,7 +1957,21 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1101
1957
|
}
|
|
1102
1958
|
}
|
|
1103
1959
|
|
|
1960
|
+
for (const [taskId, record] of activePtyEntries) {
|
|
1961
|
+
if (record?.stopForceKillTimer) {
|
|
1962
|
+
clearTimeout(record.stopForceKillTimer);
|
|
1963
|
+
}
|
|
1964
|
+
try {
|
|
1965
|
+
if (typeof record.pty?.kill === "function") {
|
|
1966
|
+
record.pty.kill("SIGTERM");
|
|
1967
|
+
}
|
|
1968
|
+
} catch (error) {
|
|
1969
|
+
logError(`Failed to stop PTY task ${taskId} on daemon close: ${error?.message || error}`);
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1104
1973
|
activeTaskProcesses.clear();
|
|
1974
|
+
activePtySessions.clear();
|
|
1105
1975
|
|
|
1106
1976
|
try {
|
|
1107
1977
|
await withTimeout(
|
|
@@ -1174,6 +2044,82 @@ function parsePositiveInt(value, fallback) {
|
|
|
1174
2044
|
return fallback;
|
|
1175
2045
|
}
|
|
1176
2046
|
|
|
2047
|
+
function formatDisconnectDiagnostics(event) {
|
|
2048
|
+
const parts = [];
|
|
2049
|
+
const reason = typeof event?.reason === "string" && event.reason.trim()
|
|
2050
|
+
? event.reason.trim()
|
|
2051
|
+
: "unknown";
|
|
2052
|
+
parts.push(`reason=${reason}`);
|
|
2053
|
+
if (Number.isFinite(event?.closeCode)) {
|
|
2054
|
+
parts.push(`close_code=${event.closeCode}`);
|
|
2055
|
+
}
|
|
2056
|
+
if (typeof event?.closeReason === "string" && event.closeReason.trim()) {
|
|
2057
|
+
parts.push(`close_reason=${event.closeReason.trim()}`);
|
|
2058
|
+
}
|
|
2059
|
+
if (typeof event?.socketError === "string" && event.socketError.trim()) {
|
|
2060
|
+
parts.push(`socket_error=${event.socketError.trim()}`);
|
|
2061
|
+
}
|
|
2062
|
+
if (Number.isFinite(event?.missedPongs) && event.missedPongs > 0) {
|
|
2063
|
+
parts.push(`missed_pongs=${event.missedPongs}`);
|
|
2064
|
+
}
|
|
2065
|
+
if (Number.isFinite(event?.lastPingAt)) {
|
|
2066
|
+
parts.push(`last_ping_at=${formatIsoTimestamp(event.lastPingAt)}`);
|
|
2067
|
+
}
|
|
2068
|
+
if (Number.isFinite(event?.lastPongAt)) {
|
|
2069
|
+
parts.push(`last_pong_at=${formatIsoTimestamp(event.lastPongAt)}`);
|
|
2070
|
+
}
|
|
2071
|
+
if (Number.isFinite(event?.lastMessageAt)) {
|
|
2072
|
+
parts.push(`last_message_at=${formatIsoTimestamp(event.lastMessageAt)}`);
|
|
2073
|
+
}
|
|
2074
|
+
return parts.join(" ");
|
|
2075
|
+
}
|
|
2076
|
+
|
|
2077
|
+
function formatDaemonHealthState({
|
|
2078
|
+
connectedAt,
|
|
2079
|
+
lastPongAt,
|
|
2080
|
+
lastInboundAt,
|
|
2081
|
+
lastSuccessfulHttpAt,
|
|
2082
|
+
lastPresenceConfirmedAt,
|
|
2083
|
+
}) {
|
|
2084
|
+
return [
|
|
2085
|
+
`connected_at=${formatIsoTimestamp(connectedAt)}`,
|
|
2086
|
+
`last_pong_at=${formatIsoTimestamp(lastPongAt)}`,
|
|
2087
|
+
`last_inbound_at=${formatIsoTimestamp(lastInboundAt)}`,
|
|
2088
|
+
`last_http_ok_at=${formatIsoTimestamp(lastSuccessfulHttpAt)}`,
|
|
2089
|
+
`last_presence_at=${formatIsoTimestamp(lastPresenceConfirmedAt)}`,
|
|
2090
|
+
].join(" ");
|
|
2091
|
+
}
|
|
2092
|
+
|
|
2093
|
+
function formatWatchdogExtra(extra) {
|
|
2094
|
+
const parts = [];
|
|
2095
|
+
if (Number.isFinite(extra?.agentCount)) {
|
|
2096
|
+
parts.push(`agent_count=${extra.agentCount}`);
|
|
2097
|
+
}
|
|
2098
|
+
if (Number.isFinite(extra?.probeStatus)) {
|
|
2099
|
+
parts.push(`probe_status=${extra.probeStatus}`);
|
|
2100
|
+
}
|
|
2101
|
+
if (Number.isFinite(extra?.probeAt)) {
|
|
2102
|
+
parts.push(`probe_at=${formatIsoTimestamp(extra.probeAt)}`);
|
|
2103
|
+
}
|
|
2104
|
+
if (typeof extra?.probeError === "string" && extra.probeError.trim()) {
|
|
2105
|
+
parts.push(`probe_error=${extra.probeError.trim()}`);
|
|
2106
|
+
}
|
|
2107
|
+
if (Number.isFinite(extra?.lastWsHealthAt)) {
|
|
2108
|
+
parts.push(`last_ws_health_at=${formatIsoTimestamp(extra.lastWsHealthAt)}`);
|
|
2109
|
+
}
|
|
2110
|
+
if (Number.isFinite(extra?.staleForMs)) {
|
|
2111
|
+
parts.push(`stale_for_ms=${extra.staleForMs}`);
|
|
2112
|
+
}
|
|
2113
|
+
return parts.length ? parts.join(" ") : "no-extra-diagnostics";
|
|
2114
|
+
}
|
|
2115
|
+
|
|
2116
|
+
function formatIsoTimestamp(value) {
|
|
2117
|
+
if (!Number.isFinite(value)) {
|
|
2118
|
+
return "never";
|
|
2119
|
+
}
|
|
2120
|
+
return new Date(value).toISOString();
|
|
2121
|
+
}
|
|
2122
|
+
|
|
1177
2123
|
async function withTimeout(promise, timeoutMs, label) {
|
|
1178
2124
|
let timer = null;
|
|
1179
2125
|
const timeoutPromise = new Promise((_, reject) => {
|