@love-moon/conductor-cli 0.2.42 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/conductor-fire.js +21 -3
- package/bin/conductor-issue.js +357 -0
- package/bin/conductor-project.js +436 -0
- package/bin/conductor-task.js +285 -0
- package/bin/conductor.js +25 -1
- package/package.json +9 -4
- package/src/ai-manager-handlers.js +17 -1
- package/src/daemon.js +795 -35
- package/src/entity-helpers.js +345 -0
- package/src/fire/resume.js +113 -870
- package/src/runtime-backends.js +48 -8
package/src/daemon.js
CHANGED
|
@@ -2,7 +2,7 @@ import fs from "node:fs";
|
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import os from "node:os";
|
|
4
4
|
import { createRequire } from "node:module";
|
|
5
|
-
import { spawn } from "node:child_process";
|
|
5
|
+
import { spawn, spawnSync } from "node:child_process";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
|
|
8
8
|
import dotenv from "dotenv";
|
|
@@ -163,6 +163,33 @@ function getUserConfig(configFilePath) {
|
|
|
163
163
|
return {};
|
|
164
164
|
}
|
|
165
165
|
|
|
166
|
+
// Read whether the daemon should launch each Fire process inside a detached
|
|
167
|
+
// tmux session. When enabled, the daemon spawns `tmux new-session -d ...` so
|
|
168
|
+
// that the Fire process runs under the tmux server with no parent/child
|
|
169
|
+
// relationship to the daemon. The daemon can therefore be restarted or killed
|
|
170
|
+
// without affecting any running Fire process.
|
|
171
|
+
//
|
|
172
|
+
// Resolution order:
|
|
173
|
+
// 1. CONDUCTOR_FIRE_TMUX_MODE env var ("1"/"true"/"on" enable, "0"/"false"/"off" disable)
|
|
174
|
+
// 2. fire_tmux_mode boolean in ~/.conductor/config.yaml
|
|
175
|
+
// 3. Default: false
|
|
176
|
+
function getFireTmuxModeEnabled(userConfig) {
|
|
177
|
+
const rawEnv = process.env.CONDUCTOR_FIRE_TMUX_MODE;
|
|
178
|
+
if (typeof rawEnv === "string" && rawEnv.trim()) {
|
|
179
|
+
const normalized = rawEnv.trim().toLowerCase();
|
|
180
|
+
if (normalized === "1" || normalized === "true" || normalized === "on" || normalized === "yes") {
|
|
181
|
+
return true;
|
|
182
|
+
}
|
|
183
|
+
if (normalized === "0" || normalized === "false" || normalized === "off" || normalized === "no") {
|
|
184
|
+
return false;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
if (userConfig && typeof userConfig === "object") {
|
|
188
|
+
return userConfig.fire_tmux_mode === true;
|
|
189
|
+
}
|
|
190
|
+
return false;
|
|
191
|
+
}
|
|
192
|
+
|
|
166
193
|
function normalizePlanLimitType(limitType) {
|
|
167
194
|
if (typeof limitType !== "string") {
|
|
168
195
|
return null;
|
|
@@ -567,6 +594,12 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
567
594
|
let shutdownSignalHandled = false;
|
|
568
595
|
let forcedSignalExitHandled = false;
|
|
569
596
|
let processHandlersAttached = false;
|
|
597
|
+
// Mirrors the later `daemonShuttingDown` flag but is declared up front so
|
|
598
|
+
// process-level error handlers registered below can safely read it even if
|
|
599
|
+
// they fire before the main daemon state is initialized. `shutdownDaemon`
|
|
600
|
+
// flips this alongside `daemonShuttingDown` to signal that late WebSocket
|
|
601
|
+
// errors should be treated as benign.
|
|
602
|
+
let daemonShutdownInProgress = false;
|
|
570
603
|
|
|
571
604
|
const removeProcessListener = (eventName, handler) => {
|
|
572
605
|
if (typeof process.off === "function") {
|
|
@@ -644,6 +677,16 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
644
677
|
const WORKSPACE_ROOT = expandHomePath(workspaceRootValue, homeDir);
|
|
645
678
|
const CLI_PATH_VAL = config.CLI_PATH || CLI_PATH;
|
|
646
679
|
|
|
680
|
+
// When enabled, every Fire process is launched inside a detached tmux
|
|
681
|
+
// session via `tmux new-session -d`. The daemon-spawned `tmux` client exits
|
|
682
|
+
// immediately after creating the session, leaving the Fire process running
|
|
683
|
+
// under the tmux server with no parent/child relationship to the daemon.
|
|
684
|
+
// The actual runtime activation flag (FIRE_TMUX_MODE_ACTIVE) is computed
|
|
685
|
+
// below after we verify tmux is installed; if tmux is missing we log a
|
|
686
|
+
// warning and silently fall back to direct spawn rather than failing every
|
|
687
|
+
// create_task with ENOENT.
|
|
688
|
+
const FIRE_TMUX_MODE_ENABLED = getFireTmuxModeEnabled(userConfig);
|
|
689
|
+
|
|
647
690
|
// Get allow_cli_list from config
|
|
648
691
|
const RAW_ALLOW_CLI_LIST = getRawAllowCliList(userConfig);
|
|
649
692
|
let ALLOW_CLI_LIST = {};
|
|
@@ -710,6 +753,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
710
753
|
);
|
|
711
754
|
|
|
712
755
|
const spawnFn = deps.spawn || spawn;
|
|
756
|
+
const spawnSyncFn = deps.spawnSync || spawnSync;
|
|
713
757
|
const mkdirSyncFn = deps.mkdirSync || fs.mkdirSync;
|
|
714
758
|
const writeFileSyncFn = deps.writeFileSync || fs.writeFileSync;
|
|
715
759
|
const existsSyncFn = deps.existsSync || fs.existsSync;
|
|
@@ -731,8 +775,384 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
731
775
|
const resolveProjectSnapshotFn =
|
|
732
776
|
deps.resolveProjectSnapshot || ((projectPath) => new ProjectContext(projectPath).snapshot());
|
|
733
777
|
|
|
734
|
-
|
|
735
|
-
|
|
778
|
+
// ---- Fire tmux mode helpers ---------------------------------------------
|
|
779
|
+
// Probe whether the `tmux` binary is available on PATH. Used at daemon
|
|
780
|
+
// startup so a misconfigured environment falls back gracefully instead of
|
|
781
|
+
// failing every create_task with ENOENT.
|
|
782
|
+
function isTmuxAvailable() {
|
|
783
|
+
try {
|
|
784
|
+
const result = spawnSyncFn("tmux", ["-V"], {
|
|
785
|
+
stdio: "ignore",
|
|
786
|
+
timeout: 2000,
|
|
787
|
+
});
|
|
788
|
+
if (!result || result.error) {
|
|
789
|
+
return false;
|
|
790
|
+
}
|
|
791
|
+
// spawnSync sets `status` to the exit code on success and to null when
|
|
792
|
+
// the process couldn't be started; the `pid` is also unset in the
|
|
793
|
+
// latter case.
|
|
794
|
+
return result.status === 0;
|
|
795
|
+
} catch {
|
|
796
|
+
return false;
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
// Resolve the active tmux mode: requested via config AND tmux installed.
|
|
801
|
+
const FIRE_TMUX_MODE_ACTIVE = FIRE_TMUX_MODE_ENABLED && isTmuxAvailable();
|
|
802
|
+
if (FIRE_TMUX_MODE_ENABLED && !FIRE_TMUX_MODE_ACTIVE) {
|
|
803
|
+
logError(
|
|
804
|
+
"fire_tmux_mode is enabled but `tmux` is not available on PATH; " +
|
|
805
|
+
"falling back to direct spawn. Install tmux to launch Fire processes " +
|
|
806
|
+
"in detached tmux sessions.",
|
|
807
|
+
);
|
|
808
|
+
} else if (FIRE_TMUX_MODE_ACTIVE) {
|
|
809
|
+
log("Fire tmux mode enabled: each Fire process will run in a detached tmux session");
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
// Single-quote a value so it can be embedded inside a `bash -c '...'`
|
|
813
|
+
// command. Embedded single-quotes are escaped via the standard `'\\''`
|
|
814
|
+
// sequence.
|
|
815
|
+
function shellQuoteForBash(value) {
|
|
816
|
+
const str = String(value ?? "");
|
|
817
|
+
if (str === "") {
|
|
818
|
+
return "''";
|
|
819
|
+
}
|
|
820
|
+
return `'${str.replace(/'/g, `'\\''`)}'`;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// Build the deterministic prefix shared by every tmux session belonging to
|
|
824
|
+
// a given task id. Tmux session names cannot contain `:` or `.`; we
|
|
825
|
+
// sanitize aggressively and clamp the length. The trailing `-` separates
|
|
826
|
+
// the prefix from the per-spawn uniqueness suffix added by
|
|
827
|
+
// `buildFireTmuxSessionName`. Used both by spawn (to construct the full
|
|
828
|
+
// name) and by orphan cleanup on task delete (to find every session that
|
|
829
|
+
// belongs to a deleted task — even ones the current daemon never tracked,
|
|
830
|
+
// e.g. left over from a previous daemon process).
|
|
831
|
+
function buildFireTmuxSessionPrefix(taskId) {
|
|
832
|
+
const safe = String(taskId || "")
|
|
833
|
+
.replace(/[^a-zA-Z0-9_-]/g, "_")
|
|
834
|
+
.replace(/^_+|_+$/g, "")
|
|
835
|
+
.slice(0, 32) || "task";
|
|
836
|
+
return `conductor-fire-${safe}-`;
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
// Build a unique tmux session name for a Fire spawn.
|
|
840
|
+
//
|
|
841
|
+
// We append a per-spawn uniqueness suffix (base36 timestamp + 4 random
|
|
842
|
+
// chars) so re-spawning the same task id while a previous session may
|
|
843
|
+
// still be alive — for example after a daemon restart that left the old
|
|
844
|
+
// session running — does not collide with a `duplicate session` error
|
|
845
|
+
// from tmux.
|
|
846
|
+
function buildFireTmuxSessionName(taskId) {
|
|
847
|
+
const uniq = `${Date.now().toString(36)}${Math.random().toString(36).slice(2, 6)}`;
|
|
848
|
+
return `${buildFireTmuxSessionPrefix(taskId)}${uniq}`;
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
// Spawn the Fire CLI either directly (default) or inside a detached tmux
|
|
852
|
+
// session (when FIRE_TMUX_MODE_ACTIVE). In tmux mode the returned `child`
|
|
853
|
+
// is the short-lived `tmux new-session` client; once it exits with code 0
|
|
854
|
+
// the Fire process keeps running under the tmux server, fully detached from
|
|
855
|
+
// the daemon. Fire's stdout/stderr is redirected directly to `logPath` so
|
|
856
|
+
// the daemon does not need to pipe streams.
|
|
857
|
+
function spawnFireProcess({ taskId, args, env, cwd, logPath }) {
|
|
858
|
+
if (!FIRE_TMUX_MODE_ACTIVE) {
|
|
859
|
+
const child = spawnFn(process.execPath, [CLI_PATH_VAL, ...args], {
|
|
860
|
+
cwd,
|
|
861
|
+
env,
|
|
862
|
+
stdio: ["inherit", "pipe", "pipe"],
|
|
863
|
+
});
|
|
864
|
+
return { child, tmuxSession: null };
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
const sessionName = buildFireTmuxSessionName(taskId);
|
|
868
|
+
const innerCommandParts = [process.execPath, CLI_PATH_VAL, ...args].map(shellQuoteForBash);
|
|
869
|
+
// Pipe Fire's stdout/stderr through `tee -a <log>` instead of plain
|
|
870
|
+
// `>> <log> 2>&1`. With redirection alone, the tmux pane's terminal
|
|
871
|
+
// shows nothing (all output goes straight to the file), so attaching
|
|
872
|
+
// via `tmux a -t <session>` for live observation is useless. With
|
|
873
|
+
// `tee` the same bytes go to both the pane (visible to whoever
|
|
874
|
+
// attaches) and the log file (preserved for offline inspection).
|
|
875
|
+
const redirectedCommand = logPath
|
|
876
|
+
? `${innerCommandParts.join(" ")} 2>&1 | tee -a ${shellQuoteForBash(logPath)}`
|
|
877
|
+
: innerCommandParts.join(" ");
|
|
878
|
+
|
|
879
|
+
// Build `-e KEY=VALUE` flags for the new session.
|
|
880
|
+
//
|
|
881
|
+
// Why we cannot rely on the spawn `env` to reach Fire:
|
|
882
|
+
// `tmux new-session` is dispatched through the user's tmux server
|
|
883
|
+
// process. If a tmux server is already running (which is the common
|
|
884
|
+
// case on dev machines that use tmux for other work) the new session
|
|
885
|
+
// inherits that server's process environment — NOT the env we pass
|
|
886
|
+
// to the freshly spawned `tmux` client. The `update-environment`
|
|
887
|
+
// server option only forwards a small allowlist (DISPLAY,
|
|
888
|
+
// SSH_AUTH_SOCK, …), none of which are CONDUCTOR_*.
|
|
889
|
+
//
|
|
890
|
+
// Without this fix Fire would start with no CONDUCTOR_TASK_ID /
|
|
891
|
+
// CONDUCTOR_AGENT_TOKEN / CONDUCTOR_BACKEND_URL, its
|
|
892
|
+
// ConductorClient.connect would either fail authentication or send
|
|
893
|
+
// `session_started` to the wrong task id, and the frontend would
|
|
894
|
+
// never see the "<backend> session started: <uuid>" line.
|
|
895
|
+
//
|
|
896
|
+
// The `-e` flag attaches the variable to *this* session's environment,
|
|
897
|
+
// bypassing the server's stale environment entirely.
|
|
898
|
+
const tmuxEnvFlags = [];
|
|
899
|
+
for (const [key, value] of Object.entries(env || {})) {
|
|
900
|
+
if (value === undefined || value === null) continue;
|
|
901
|
+
const stringValue = String(value);
|
|
902
|
+
// Skip values containing NUL / newline / carriage-return — tmux's
|
|
903
|
+
// environment storage does not handle them and they would otherwise
|
|
904
|
+
// corrupt the session env or fail the new-session call. Spaces are
|
|
905
|
+
// fine because each `-e KEY=VALUE` is a single argv element (e.g.
|
|
906
|
+
// CONDUCTOR_CLI_COMMAND="codex --dangerously-bypass-...").
|
|
907
|
+
if (/[\u0000\r\n]/.test(stringValue)) continue;
|
|
908
|
+
tmuxEnvFlags.push("-e", `${key}=${stringValue}`);
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
// Use a non-login `bash -c` here: node and the CLI script are passed by
|
|
912
|
+
// absolute path, so we don't need PATH from a login shell, and `-l`
|
|
913
|
+
// would slow startup and could leak unexpected stderr from user shell
|
|
914
|
+
// init scripts.
|
|
915
|
+
const tmuxArgs = [
|
|
916
|
+
"new-session",
|
|
917
|
+
"-d",
|
|
918
|
+
...tmuxEnvFlags,
|
|
919
|
+
"-s",
|
|
920
|
+
sessionName,
|
|
921
|
+
"-c",
|
|
922
|
+
cwd,
|
|
923
|
+
"bash",
|
|
924
|
+
"-c",
|
|
925
|
+
`exec ${redirectedCommand}`,
|
|
926
|
+
];
|
|
927
|
+
log(`Spawning Fire via tmux: session=${sessionName} cwd=${cwd}`);
|
|
928
|
+
const child = spawnFn("tmux", tmuxArgs, {
|
|
929
|
+
cwd,
|
|
930
|
+
env,
|
|
931
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
932
|
+
detached: true,
|
|
933
|
+
});
|
|
934
|
+
if (typeof child.unref === "function") {
|
|
935
|
+
child.unref();
|
|
936
|
+
}
|
|
937
|
+
return { child, tmuxSession: sessionName };
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
// Async probe: does the named tmux session still exist? Resolves to a
|
|
941
|
+
// boolean — `true` iff `tmux has-session -t <name>` exits with code 0.
|
|
942
|
+
// Any spawn error, non-zero exit, or timeout resolves to `false`.
|
|
943
|
+
//
|
|
944
|
+
// The hard timeout matters because a wedged tmux server (bad socket perms,
|
|
945
|
+
// hung server process) would otherwise leave probes pending forever and,
|
|
946
|
+
// combined with the periodic reaper, leak children + Promises. The
|
|
947
|
+
// timeout is overridable via `config.TMUX_PROBE_TIMEOUT_MS` (mostly for
|
|
948
|
+
// tests).
|
|
949
|
+
const TMUX_PROBE_TIMEOUT_MS = (() => {
|
|
950
|
+
const explicit = Number(config.TMUX_PROBE_TIMEOUT_MS);
|
|
951
|
+
if (Number.isFinite(explicit) && explicit > 0) {
|
|
952
|
+
return explicit;
|
|
953
|
+
}
|
|
954
|
+
return 5000;
|
|
955
|
+
})();
|
|
956
|
+
function tmuxSessionExists(sessionName) {
|
|
957
|
+
return new Promise((resolve) => {
|
|
958
|
+
if (!sessionName) {
|
|
959
|
+
resolve(false);
|
|
960
|
+
return;
|
|
961
|
+
}
|
|
962
|
+
let settled = false;
|
|
963
|
+
let probe = null;
|
|
964
|
+
let timer = null;
|
|
965
|
+
const settle = (alive) => {
|
|
966
|
+
if (settled) return;
|
|
967
|
+
settled = true;
|
|
968
|
+
if (timer) {
|
|
969
|
+
clearTimeout(timer);
|
|
970
|
+
timer = null;
|
|
971
|
+
}
|
|
972
|
+
resolve(alive);
|
|
973
|
+
};
|
|
974
|
+
try {
|
|
975
|
+
probe = spawnFn("tmux", ["has-session", "-t", sessionName], {
|
|
976
|
+
stdio: "ignore",
|
|
977
|
+
});
|
|
978
|
+
probe.on("exit", (code) => settle(code === 0));
|
|
979
|
+
probe.on("error", () => settle(false));
|
|
980
|
+
timer = setTimeout(() => {
|
|
981
|
+
// Probe took too long — assume the session state is unknown,
|
|
982
|
+
// treat as "not alive" (so the reaper falls back to safe cleanup),
|
|
983
|
+
// and best-effort kill the stuck child so it doesn't pile up.
|
|
984
|
+
try {
|
|
985
|
+
if (probe && typeof probe.kill === "function") {
|
|
986
|
+
probe.kill("SIGKILL");
|
|
987
|
+
}
|
|
988
|
+
} catch {
|
|
989
|
+
// ignore; we already settled
|
|
990
|
+
}
|
|
991
|
+
logError(
|
|
992
|
+
`tmux has-session probe timed out after ${TMUX_PROBE_TIMEOUT_MS}ms for session ${sessionName}`,
|
|
993
|
+
);
|
|
994
|
+
settle(false);
|
|
995
|
+
}, TMUX_PROBE_TIMEOUT_MS);
|
|
996
|
+
if (typeof timer.unref === "function") {
|
|
997
|
+
timer.unref();
|
|
998
|
+
}
|
|
999
|
+
} catch {
|
|
1000
|
+
settle(false);
|
|
1001
|
+
}
|
|
1002
|
+
});
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
// Walk every tmux-mode entry in `activeTaskProcesses` and remove the ones
|
|
1006
|
+
// whose tmux session no longer exists. We don't (and can't reliably)
|
|
1007
|
+
// observe the inner Fire process exit when we never owned it as a child,
|
|
1008
|
+
// so this best-effort sweep is what keeps the active map from leaking
|
|
1009
|
+
// across long daemon lifetimes.
|
|
1010
|
+
//
|
|
1011
|
+
// Theoretical startup race (intentionally not guarded against):
|
|
1012
|
+
// `spawnFireProcess` returns synchronously, and we set the active
|
|
1013
|
+
// record before the spawned `tmux new-session -d` client has actually
|
|
1014
|
+
// exited. There is therefore a microsecond-scale window in which an
|
|
1015
|
+
// active record exists but `tmux has-session` may return false because
|
|
1016
|
+
// the session has not finished registering with the tmux server. With
|
|
1017
|
+
// the default 30s poll interval the chance of hitting this window is
|
|
1018
|
+
// ~negligible, and even when hit Fire's own websocket connection
|
|
1019
|
+
// subsequently overwrites any stale terminal status the daemon
|
|
1020
|
+
// reports. If this ever shows up in production, add a `createdAt`
|
|
1021
|
+
// timestamp to the record and a grace period here.
|
|
1022
|
+
async function reapDeadTmuxSessionsOnce() {
|
|
1023
|
+
const candidates = [];
|
|
1024
|
+
for (const [taskId, record] of activeTaskProcesses.entries()) {
|
|
1025
|
+
if (record?.tmuxMode && record.tmuxSession) {
|
|
1026
|
+
candidates.push([taskId, record]);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
for (const [taskId, record] of candidates) {
|
|
1030
|
+
const alive = await tmuxSessionExists(record.tmuxSession);
|
|
1031
|
+
// Only remove the entry if it still points to the same record; a
|
|
1032
|
+
// concurrent restart_task may have replaced it while we were probing.
|
|
1033
|
+
if (!alive && activeTaskProcesses.get(taskId) === record) {
|
|
1034
|
+
log(
|
|
1035
|
+
`Tmux session ${record.tmuxSession} for task ${taskId} no longer exists; cleaning up activeTaskProcesses entry`,
|
|
1036
|
+
);
|
|
1037
|
+
if (record.stopForceKillTimer) {
|
|
1038
|
+
clearTimeout(record.stopForceKillTimer);
|
|
1039
|
+
record.stopForceKillTimer = null;
|
|
1040
|
+
}
|
|
1041
|
+
activeTaskProcesses.delete(taskId);
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
// Best-effort: terminate a Fire process running in tmux by killing the
|
|
1047
|
+
// session. Returns true if the kill command was issued.
|
|
1048
|
+
function killFireTmuxSession(sessionName) {
|
|
1049
|
+
if (!sessionName) {
|
|
1050
|
+
return false;
|
|
1051
|
+
}
|
|
1052
|
+
try {
|
|
1053
|
+
const killChild = spawnFn("tmux", ["kill-session", "-t", sessionName], {
|
|
1054
|
+
stdio: "ignore",
|
|
1055
|
+
detached: true,
|
|
1056
|
+
});
|
|
1057
|
+
if (typeof killChild.unref === "function") {
|
|
1058
|
+
killChild.unref();
|
|
1059
|
+
}
|
|
1060
|
+
killChild.on("error", (err) => {
|
|
1061
|
+
logError(`Failed to issue tmux kill-session for ${sessionName}: ${err?.message || err}`);
|
|
1062
|
+
});
|
|
1063
|
+
return true;
|
|
1064
|
+
} catch (error) {
|
|
1065
|
+
logError(`Failed to spawn tmux kill-session for ${sessionName}: ${error?.message || error}`);
|
|
1066
|
+
return false;
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
// List every tmux session name visible to the current user. Returns `[]`
|
|
1071
|
+
// on any spawn error, non-zero exit, or timeout (e.g. tmux not available
|
|
1072
|
+
// or the server not running). Subject to TMUX_PROBE_TIMEOUT_MS.
|
|
1073
|
+
function listAllTmuxSessions() {
|
|
1074
|
+
return new Promise((resolve) => {
|
|
1075
|
+
let settled = false;
|
|
1076
|
+
let stdout = "";
|
|
1077
|
+
let timer = null;
|
|
1078
|
+
const settle = (lines) => {
|
|
1079
|
+
if (settled) return;
|
|
1080
|
+
settled = true;
|
|
1081
|
+
if (timer) {
|
|
1082
|
+
clearTimeout(timer);
|
|
1083
|
+
timer = null;
|
|
1084
|
+
}
|
|
1085
|
+
resolve(lines);
|
|
1086
|
+
};
|
|
1087
|
+
try {
|
|
1088
|
+
const probe = spawnFn("tmux", ["list-sessions", "-F", "#{session_name}"], {
|
|
1089
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
1090
|
+
});
|
|
1091
|
+
if (probe.stdout && typeof probe.stdout.on === "function") {
|
|
1092
|
+
probe.stdout.on("data", (chunk) => {
|
|
1093
|
+
stdout += chunk.toString("utf8");
|
|
1094
|
+
});
|
|
1095
|
+
}
|
|
1096
|
+
probe.on("exit", (code) => {
|
|
1097
|
+
if (code === 0) {
|
|
1098
|
+
settle(
|
|
1099
|
+
stdout
|
|
1100
|
+
.split("\n")
|
|
1101
|
+
.map((s) => s.trim())
|
|
1102
|
+
.filter(Boolean),
|
|
1103
|
+
);
|
|
1104
|
+
} else {
|
|
1105
|
+
// Non-zero exit usually means "no server running" or "no
|
|
1106
|
+
// sessions" — both fine, just return empty.
|
|
1107
|
+
settle([]);
|
|
1108
|
+
}
|
|
1109
|
+
});
|
|
1110
|
+
probe.on("error", () => settle([]));
|
|
1111
|
+
timer = setTimeout(() => {
|
|
1112
|
+
try {
|
|
1113
|
+
if (typeof probe.kill === "function") {
|
|
1114
|
+
probe.kill("SIGKILL");
|
|
1115
|
+
}
|
|
1116
|
+
} catch {
|
|
1117
|
+
// ignore; we already settled
|
|
1118
|
+
}
|
|
1119
|
+
logError(`tmux list-sessions probe timed out after ${TMUX_PROBE_TIMEOUT_MS}ms`);
|
|
1120
|
+
settle([]);
|
|
1121
|
+
}, TMUX_PROBE_TIMEOUT_MS);
|
|
1122
|
+
if (typeof timer.unref === "function") {
|
|
1123
|
+
timer.unref();
|
|
1124
|
+
}
|
|
1125
|
+
} catch {
|
|
1126
|
+
settle([]);
|
|
1127
|
+
}
|
|
1128
|
+
});
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
// Belt-and-suspenders cleanup for `delete_task`: when the frontend deletes
|
|
1132
|
+
// a task, the daemon's in-memory `activeTaskProcesses` map may not contain
|
|
1133
|
+
// its record (e.g. daemon was restarted between spawn and delete, or the
|
|
1134
|
+
// liveness reaper had already removed it). The tmux session itself can
|
|
1135
|
+
// still be alive in those cases, leaking a Fire process. This walks
|
|
1136
|
+
// `tmux list-sessions` and kills every session whose name matches the
|
|
1137
|
+
// deterministic prefix derived from the task id.
|
|
1138
|
+
async function killTmuxSessionsForDeletedTask(taskId) {
|
|
1139
|
+
if (!FIRE_TMUX_MODE_ACTIVE || !taskId) return 0;
|
|
1140
|
+
const prefix = buildFireTmuxSessionPrefix(taskId);
|
|
1141
|
+
const sessions = await listAllTmuxSessions();
|
|
1142
|
+
let killed = 0;
|
|
1143
|
+
for (const name of sessions) {
|
|
1144
|
+
if (name.startsWith(prefix)) {
|
|
1145
|
+
log(`Killing orphaned tmux session ${name} for deleted task ${taskId}`);
|
|
1146
|
+
killFireTmuxSession(name);
|
|
1147
|
+
killed += 1;
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
return killed;
|
|
1151
|
+
}
|
|
1152
|
+
// -------------------------------------------------------------------------
|
|
1153
|
+
|
|
1154
|
+
function buildTaskWorktreeRoot(projectWorkspacePath, worktreeBranch) {
|
|
1155
|
+
const sanitized = String(worktreeBranch).replace(/[/\\]/g, "_").replace(/\.\./g, "_");
|
|
736
1156
|
return path.join(projectWorkspacePath, ".conductor", "worktrees", sanitized);
|
|
737
1157
|
}
|
|
738
1158
|
|
|
@@ -783,6 +1203,10 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
783
1203
|
return resolvedPath;
|
|
784
1204
|
}
|
|
785
1205
|
|
|
1206
|
+
function normalizeGitPathspec(relativePath) {
|
|
1207
|
+
return String(relativePath || "").split(path.sep).join("/");
|
|
1208
|
+
}
|
|
1209
|
+
|
|
786
1210
|
const PROJECT_SETTINGS_TEMPLATE = [
|
|
787
1211
|
"worktree:",
|
|
788
1212
|
" sync_branch: false",
|
|
@@ -843,7 +1267,30 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
843
1267
|
};
|
|
844
1268
|
}
|
|
845
1269
|
|
|
846
|
-
function
|
|
1270
|
+
async function isGitTrackedWorktreePath({ projectRepoRoot, sourcePath }) {
|
|
1271
|
+
const relativeToRepo = path.relative(projectRepoRoot, sourcePath);
|
|
1272
|
+
if (
|
|
1273
|
+
!relativeToRepo ||
|
|
1274
|
+
relativeToRepo === "." ||
|
|
1275
|
+
relativeToRepo.startsWith("..") ||
|
|
1276
|
+
path.isAbsolute(relativeToRepo)
|
|
1277
|
+
) {
|
|
1278
|
+
return false;
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
try {
|
|
1282
|
+
const { stdout } = await runSpawnProcess(
|
|
1283
|
+
"git",
|
|
1284
|
+
["-C", projectRepoRoot, "ls-files", "--", normalizeGitPathspec(relativeToRepo)],
|
|
1285
|
+
{ cwd: projectRepoRoot, timeoutMs: WORKTREE_SYNC_TIMEOUT_MS },
|
|
1286
|
+
);
|
|
1287
|
+
return stdout.trim().length > 0;
|
|
1288
|
+
} catch {
|
|
1289
|
+
return false;
|
|
1290
|
+
}
|
|
1291
|
+
}
|
|
1292
|
+
|
|
1293
|
+
async function ensureTaskWorktreeSymlinks({ projectRepoRoot, projectWorkspacePath, finalCwd }) {
|
|
847
1294
|
const { symlinkPaths } = readProjectWorktreeSettings(projectWorkspacePath);
|
|
848
1295
|
for (const configuredPath of symlinkPaths) {
|
|
849
1296
|
const sourcePath = resolveProjectScopedPath(
|
|
@@ -851,6 +1298,11 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
851
1298
|
configuredPath,
|
|
852
1299
|
`worktree.symlink entry ${configuredPath}`,
|
|
853
1300
|
);
|
|
1301
|
+
// Git-tracked files and directories should come from the checked-out
|
|
1302
|
+
// worktree itself rather than being overwritten by project-local symlinks.
|
|
1303
|
+
if (await isGitTrackedWorktreePath({ projectRepoRoot, sourcePath })) {
|
|
1304
|
+
continue;
|
|
1305
|
+
}
|
|
854
1306
|
const linkPath = resolveProjectScopedPath(
|
|
855
1307
|
finalCwd,
|
|
856
1308
|
configuredPath,
|
|
@@ -977,7 +1429,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
977
1429
|
|
|
978
1430
|
const worktreeRoot = buildTaskWorktreeRoot(
|
|
979
1431
|
worktreeConfig.projectWorkspacePath,
|
|
980
|
-
worktreeConfig.
|
|
1432
|
+
worktreeConfig.worktreeBranch,
|
|
981
1433
|
);
|
|
982
1434
|
const finalCwd = resolveTaskWorktreeCwd(worktreeRoot, worktreeConfig.projectRelativePath);
|
|
983
1435
|
const gitMarkerPath = path.join(worktreeRoot, ".git");
|
|
@@ -1069,7 +1521,8 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1069
1521
|
}
|
|
1070
1522
|
|
|
1071
1523
|
mkdirSyncFn(finalCwd, { recursive: true });
|
|
1072
|
-
ensureTaskWorktreeSymlinks({
|
|
1524
|
+
await ensureTaskWorktreeSymlinks({
|
|
1525
|
+
projectRepoRoot: worktreeConfig.projectRepoRoot,
|
|
1073
1526
|
projectWorkspacePath: worktreeConfig.projectWorkspacePath,
|
|
1074
1527
|
finalCwd,
|
|
1075
1528
|
});
|
|
@@ -1339,6 +1792,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1339
1792
|
return;
|
|
1340
1793
|
}
|
|
1341
1794
|
shutdownSignalHandled = true;
|
|
1795
|
+
daemonShutdownInProgress = true;
|
|
1342
1796
|
void (async () => {
|
|
1343
1797
|
try {
|
|
1344
1798
|
log(`Received ${signal}, shutting down...`);
|
|
@@ -1357,11 +1811,37 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1357
1811
|
const onSigTerm = () => {
|
|
1358
1812
|
handleSignal("SIGTERM");
|
|
1359
1813
|
};
|
|
1814
|
+
const isBenignShutdownError = (err) => {
|
|
1815
|
+
if (!err) return false;
|
|
1816
|
+
const message = (err instanceof Error ? err.message : String(err)) || "";
|
|
1817
|
+
// These errors are expected when the WebSocket is torn down while a
|
|
1818
|
+
// previously queued send is still in flight (e.g., during restart_daemon).
|
|
1819
|
+
// Silencing them prevents a benign late rejection from aborting the
|
|
1820
|
+
// in-progress restart/respawn flow.
|
|
1821
|
+
return (
|
|
1822
|
+
message.includes("WebSocket not connected") ||
|
|
1823
|
+
message.includes("WebSocket is not open") ||
|
|
1824
|
+
message.includes("WebSocket is closed")
|
|
1825
|
+
);
|
|
1826
|
+
};
|
|
1360
1827
|
const onUncaughtException = (err) => {
|
|
1828
|
+
if (daemonShutdownInProgress && isBenignShutdownError(err)) {
|
|
1829
|
+
logError(`Ignored benign error during shutdown: ${err?.message || err}`);
|
|
1830
|
+
return;
|
|
1831
|
+
}
|
|
1361
1832
|
logError(`Uncaught exception: ${err}`);
|
|
1362
1833
|
cleanupLock();
|
|
1363
1834
|
exitFn(1);
|
|
1364
1835
|
};
|
|
1836
|
+
const onUnhandledRejection = (reason) => {
|
|
1837
|
+
if (daemonShutdownInProgress && isBenignShutdownError(reason)) {
|
|
1838
|
+
logError(`Ignored benign rejection during shutdown: ${reason?.message || reason}`);
|
|
1839
|
+
return;
|
|
1840
|
+
}
|
|
1841
|
+
// Fall through to the same handling as uncaughtException so we keep a
|
|
1842
|
+
// single, predictable exit path when an unexpected rejection escapes.
|
|
1843
|
+
onUncaughtException(reason instanceof Error ? reason : new Error(String(reason)));
|
|
1844
|
+
};
|
|
1365
1845
|
const detachProcessHandlers = () => {
|
|
1366
1846
|
if (!processHandlersAttached) {
|
|
1367
1847
|
return;
|
|
@@ -1371,12 +1851,14 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1371
1851
|
removeProcessListener("SIGINT", onSigInt);
|
|
1372
1852
|
removeProcessListener("SIGTERM", onSigTerm);
|
|
1373
1853
|
removeProcessListener("uncaughtException", onUncaughtException);
|
|
1854
|
+
removeProcessListener("unhandledRejection", onUnhandledRejection);
|
|
1374
1855
|
};
|
|
1375
1856
|
|
|
1376
1857
|
process.on("exit", cleanupLock);
|
|
1377
1858
|
process.on("SIGINT", onSigInt);
|
|
1378
1859
|
process.on("SIGTERM", onSigTerm);
|
|
1379
1860
|
process.on("uncaughtException", onUncaughtException);
|
|
1861
|
+
process.on("unhandledRejection", onUnhandledRejection);
|
|
1380
1862
|
processHandlersAttached = true;
|
|
1381
1863
|
|
|
1382
1864
|
if (config.CLEAN_ALL) {
|
|
@@ -1426,6 +1908,23 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1426
1908
|
let watchdogAwaitingHealthySignalAt = null;
|
|
1427
1909
|
let watchdogTimer = null;
|
|
1428
1910
|
|
|
1911
|
+
// Tmux liveness reaper — periodically removes activeTaskProcesses entries
|
|
1912
|
+
// whose underlying tmux session no longer exists. Only used when
|
|
1913
|
+
// FIRE_TMUX_MODE_ACTIVE.
|
|
1914
|
+
//
|
|
1915
|
+
// Set `config.TMUX_LIVENESS_POLL_MS` to 0 to disable polling entirely
|
|
1916
|
+
// (escape hatch — the in-memory map will then accumulate stale entries
|
|
1917
|
+
// until the daemon restarts). Negative or non-numeric values fall back
|
|
1918
|
+
// to the 30s default.
|
|
1919
|
+
let tmuxLivenessTimer = null;
|
|
1920
|
+
const TMUX_LIVENESS_POLL_MS = (() => {
|
|
1921
|
+
const explicit = Number(config.TMUX_LIVENESS_POLL_MS);
|
|
1922
|
+
if (Number.isFinite(explicit) && explicit >= 0) {
|
|
1923
|
+
return explicit;
|
|
1924
|
+
}
|
|
1925
|
+
return 30 * 1000;
|
|
1926
|
+
})();
|
|
1927
|
+
|
|
1429
1928
|
// --- Auto-update state ---
|
|
1430
1929
|
const VERSION_CHECK_INTERVAL_MS = 6 * 60 * 60 * 1000; // 6 hours
|
|
1431
1930
|
let lastVersionCheckAt = 0;
|
|
@@ -1603,6 +2102,30 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1603
2102
|
watchdogTimer.unref();
|
|
1604
2103
|
}
|
|
1605
2104
|
void runMaintenanceTick();
|
|
2105
|
+
|
|
2106
|
+
if (FIRE_TMUX_MODE_ACTIVE && TMUX_LIVENESS_POLL_MS > 0) {
|
|
2107
|
+
// Reentrancy guard: setInterval fires at fixed wall-clock intervals
|
|
2108
|
+
// regardless of whether the previous reaper run finished. With many
|
|
2109
|
+
// active tmux tasks (or a slow tmux server), a single sweep can take
|
|
2110
|
+
// longer than the poll interval — without this flag we would fan
|
|
2111
|
+
// out concurrent sweeps that all probe the same sessions and pile
|
|
2112
|
+
// up child processes.
|
|
2113
|
+
let reapInFlight = false;
|
|
2114
|
+
tmuxLivenessTimer = setInterval(() => {
|
|
2115
|
+
if (reapInFlight) return;
|
|
2116
|
+
reapInFlight = true;
|
|
2117
|
+
reapDeadTmuxSessionsOnce()
|
|
2118
|
+
.catch((err) => {
|
|
2119
|
+
logError(`Tmux liveness reaper error: ${err?.message || err}`);
|
|
2120
|
+
})
|
|
2121
|
+
.finally(() => {
|
|
2122
|
+
reapInFlight = false;
|
|
2123
|
+
});
|
|
2124
|
+
}, TMUX_LIVENESS_POLL_MS);
|
|
2125
|
+
if (typeof tmuxLivenessTimer?.unref === "function") {
|
|
2126
|
+
tmuxLivenessTimer.unref();
|
|
2127
|
+
}
|
|
2128
|
+
}
|
|
1606
2129
|
})();
|
|
1607
2130
|
|
|
1608
2131
|
function markBackendHttpSuccess(at = Date.now()) {
|
|
@@ -1673,6 +2196,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1673
2196
|
);
|
|
1674
2197
|
if (watchdogHealAttempts > DAEMON_WATCHDOG_MAX_SELF_HEALS) {
|
|
1675
2198
|
daemonShuttingDown = true;
|
|
2199
|
+
daemonShutdownInProgress = true;
|
|
1676
2200
|
logError("[watchdog] Self-heal budget exceeded; exiting daemon for supervisor restart");
|
|
1677
2201
|
void requestShutdown("watchdog self-heal budget exceeded")
|
|
1678
2202
|
.catch((error) => {
|
|
@@ -2163,6 +2687,10 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
2163
2687
|
const staleTasks = tasks.filter((task) => {
|
|
2164
2688
|
const status = String(task?.status || "").trim().toLowerCase();
|
|
2165
2689
|
const agentHost = String(task?.agent_host || "").trim();
|
|
2690
|
+
// Skip init tasks: they may be waiting for a restart_task outbox
|
|
2691
|
+
// delivery (e.g. branch/fork creates a successor with status "init"
|
|
2692
|
+
// that the daemon hasn't started processing yet).
|
|
2693
|
+
if (status === "init") return false;
|
|
2166
2694
|
return agentHost === AGENT_NAME && (status === "unknown" || status === "running");
|
|
2167
2695
|
});
|
|
2168
2696
|
|
|
@@ -2197,6 +2725,10 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
2197
2725
|
}
|
|
2198
2726
|
}
|
|
2199
2727
|
|
|
2728
|
+
// Grace period: tasks created within this window are excluded from
|
|
2729
|
+
// reconcile to avoid racing with restart_task outbox delivery.
|
|
2730
|
+
const RECONCILE_GRACE_PERIOD_MS = 60_000;
|
|
2731
|
+
|
|
2200
2732
|
async function reconcileAssignedTasks() {
|
|
2201
2733
|
try {
|
|
2202
2734
|
const response = await fetchFn(`${BACKEND_HTTP}/api/tasks`, {
|
|
@@ -2219,6 +2751,16 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
2219
2751
|
const assigned = tasks.filter((task) => {
|
|
2220
2752
|
const agentHost = String(task?.agent_host || "").trim();
|
|
2221
2753
|
const status = String(task?.status || "").trim().toLowerCase();
|
|
2754
|
+
// Skip init tasks: they may be waiting for a restart_task outbox
|
|
2755
|
+
// delivery (e.g. branch/fork creates a successor with status "init"
|
|
2756
|
+
// that the daemon hasn't started processing yet).
|
|
2757
|
+
if (status === "init") return false;
|
|
2758
|
+
// Skip recently-created tasks to avoid racing with restart_task
|
|
2759
|
+
// delivery: a successor task may have been promoted to "running"
|
|
2760
|
+
// via shouldPromoteInitTask but its conductor-fire process is
|
|
2761
|
+
// still being spawned.
|
|
2762
|
+
const createdAtMs = task?.created_at ? new Date(task.created_at).getTime() : 0;
|
|
2763
|
+
if (createdAtMs && Date.now() - createdAtMs < RECONCILE_GRACE_PERIOD_MS) return false;
|
|
2222
2764
|
return agentHost === AGENT_NAME && (status === "unknown" || status === "running");
|
|
2223
2765
|
});
|
|
2224
2766
|
|
|
@@ -3150,6 +3692,15 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
3150
3692
|
logError(`Failed to report agent_command_ack(cleanup_task_worktree) for ${taskId}: ${error?.message || error}`);
|
|
3151
3693
|
});
|
|
3152
3694
|
|
|
3695
|
+
// cleanup_task_worktree is queued as part of the frontend's task delete
|
|
3696
|
+
// flow. Always sweep any tmux sessions for this task id, including
|
|
3697
|
+
// orphaned sessions that survived a daemon restart and so were not
|
|
3698
|
+
// tracked by `stopActiveTaskProcess` below. Fire-and-forget so we don't
|
|
3699
|
+
// block the worktree cleanup itself.
|
|
3700
|
+
killTmuxSessionsForDeletedTask(taskId).catch((error) => {
|
|
3701
|
+
logError(`Orphan tmux cleanup failed for task ${taskId}: ${error?.message || error}`);
|
|
3702
|
+
});
|
|
3703
|
+
|
|
3153
3704
|
if (activeTaskProcesses.has(taskId) || activePtySessions.has(taskId)) {
|
|
3154
3705
|
if (forceCleanup) {
|
|
3155
3706
|
const stopStarted = stopActiveTaskProcess(taskId, {
|
|
@@ -3189,7 +3740,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
3189
3740
|
|
|
3190
3741
|
const worktreeRoot = buildTaskWorktreeRoot(
|
|
3191
3742
|
worktreeConfig.projectWorkspacePath,
|
|
3192
|
-
worktreeConfig.
|
|
3743
|
+
worktreeConfig.worktreeBranch,
|
|
3193
3744
|
);
|
|
3194
3745
|
if (!isSafeTaskWorktreeRoot(worktreeConfig.projectWorkspacePath, worktreeRoot)) {
|
|
3195
3746
|
await reportTaskWorktreeCleanupResult({
|
|
@@ -3712,6 +4263,10 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
3712
4263
|
typeof snapshot?.lastCommit === "string" && snapshot.lastCommit.trim()
|
|
3713
4264
|
? snapshot.lastCommit.trim()
|
|
3714
4265
|
: null,
|
|
4266
|
+
gitRemoteUrl:
|
|
4267
|
+
typeof snapshot?.gitRemoteUrl === "string" && snapshot.gitRemoteUrl.trim()
|
|
4268
|
+
? snapshot.gitRemoteUrl.trim()
|
|
4269
|
+
: null,
|
|
3715
4270
|
fileCount:
|
|
3716
4271
|
typeof snapshot?.fileCount === "number" && Number.isInteger(snapshot.fileCount)
|
|
3717
4272
|
? snapshot.fileCount
|
|
@@ -3736,6 +4291,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
3736
4291
|
repo_root: result.repoRoot,
|
|
3737
4292
|
worktree_branch: result.worktreeBranch,
|
|
3738
4293
|
last_commit: result.lastCommit,
|
|
4294
|
+
git_remote_url: result.gitRemoteUrl,
|
|
3739
4295
|
file_count: result.fileCount,
|
|
3740
4296
|
error: result.error,
|
|
3741
4297
|
error_code: result.errorCode,
|
|
@@ -3756,7 +4312,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
3756
4312
|
) {
|
|
3757
4313
|
const processRecord = activeTaskProcesses.get(taskId);
|
|
3758
4314
|
const ptyRecord = activePtySessions.get(taskId);
|
|
3759
|
-
if ((!processRecord || !processRecord.child) && !ptyRecord) {
|
|
4315
|
+
if ((!processRecord || (!processRecord.child && !processRecord.tmuxSession)) && !ptyRecord) {
|
|
3760
4316
|
return false;
|
|
3761
4317
|
}
|
|
3762
4318
|
|
|
@@ -3776,6 +4332,46 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
3776
4332
|
cleanupPtyRtcTransport(taskId);
|
|
3777
4333
|
}
|
|
3778
4334
|
|
|
4335
|
+
if (processRecord?.tmuxMode) {
|
|
4336
|
+
// Tmux-managed Fire processes are not direct children. The tmux client
|
|
4337
|
+
// we spawned has already exited; the live Fire process is owned by the
|
|
4338
|
+
// tmux server. Terminate it by killing the session and immediately
|
|
4339
|
+
// remove it from the active map (no force-kill timer needed because
|
|
4340
|
+
// tmux kills the session synchronously).
|
|
4341
|
+
const sessionName = processRecord.tmuxSession;
|
|
4342
|
+
log(`Killing tmux session ${sessionName || "(unknown)"} for task ${taskId}`);
|
|
4343
|
+
killFireTmuxSession(sessionName);
|
|
4344
|
+
|
|
4345
|
+
// Report KILLED to the backend ourselves. Normally a `managedByFireBridge`
|
|
4346
|
+
// task relies on Fire to publish its own terminal status before exiting,
|
|
4347
|
+
// but `tmux kill-session` cascades as SIGHUP → bash → Fire and Fire
|
|
4348
|
+
// typically does not get a chance to flush a final websocket message
|
|
4349
|
+
// before being killed. Without this explicit report the task would stay
|
|
4350
|
+
// RUNNING in the frontend until some external timeout. Skip when the
|
|
4351
|
+
// caller explicitly suppressed status reporting (e.g. refresh_session
|
|
4352
|
+
// _inplace, which will re-spawn and overwrite status anyway).
|
|
4353
|
+
if (!suppressExitStatusReport && processRecord.projectId) {
|
|
4354
|
+
client
|
|
4355
|
+
.sendJson({
|
|
4356
|
+
type: "task_status_update",
|
|
4357
|
+
payload: {
|
|
4358
|
+
task_id: taskId,
|
|
4359
|
+
project_id: processRecord.projectId,
|
|
4360
|
+
status: "KILLED",
|
|
4361
|
+
summary: reason ? `stopped (${reason})` : "stopped via tmux kill-session",
|
|
4362
|
+
},
|
|
4363
|
+
})
|
|
4364
|
+
.catch((err) => {
|
|
4365
|
+
logError(
|
|
4366
|
+
`Failed to report task_status_update(KILLED) for tmux task ${taskId}: ${err?.message || err}`,
|
|
4367
|
+
);
|
|
4368
|
+
});
|
|
4369
|
+
}
|
|
4370
|
+
|
|
4371
|
+
activeTaskProcesses.delete(taskId);
|
|
4372
|
+
return true;
|
|
4373
|
+
}
|
|
4374
|
+
|
|
3779
4375
|
if (processRecord?.child) {
|
|
3780
4376
|
try {
|
|
3781
4377
|
if (typeof processRecord.child.kill === "function") {
|
|
@@ -3891,11 +4487,27 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
3891
4487
|
if ((!processRecord || !processRecord.child) && !ptyRecord) {
|
|
3892
4488
|
log(`Stop requested for task ${taskId}, but no active process found`);
|
|
3893
4489
|
sendStopAck(false);
|
|
4490
|
+
// Even when we have no in-memory record, the task may still own a
|
|
4491
|
+
// tmux session (e.g. the daemon was restarted between spawn and
|
|
4492
|
+
// stop, or the liveness reaper removed our entry but the session
|
|
4493
|
+
// is alive). Try a name-based orphan kill as a belt-and-suspenders
|
|
4494
|
+
// — fire-and-forget so we don't delay the ack response.
|
|
4495
|
+
killTmuxSessionsForDeletedTask(taskId).catch((error) => {
|
|
4496
|
+
logError(`Orphan tmux cleanup failed for task ${taskId}: ${error?.message || error}`);
|
|
4497
|
+
});
|
|
3894
4498
|
return;
|
|
3895
4499
|
}
|
|
3896
4500
|
|
|
3897
4501
|
sendStopAck(true);
|
|
3898
4502
|
stopActiveTaskProcess(taskId, { reason: payload?.reason });
|
|
4503
|
+
// Belt-and-suspenders: also sweep any tmux sessions matching this
|
|
4504
|
+
// task id that didn't make it into the active map (stale entries
|
|
4505
|
+
// from previous spawns, daemon restarts, etc.). `stopActiveTaskProcess`
|
|
4506
|
+
// already kills the session associated with the current active record;
|
|
4507
|
+
// this catches everything else with the same task-id prefix.
|
|
4508
|
+
killTmuxSessionsForDeletedTask(taskId).catch((error) => {
|
|
4509
|
+
logError(`Orphan tmux cleanup failed for task ${taskId}: ${error?.message || error}`);
|
|
4510
|
+
});
|
|
3899
4511
|
}
|
|
3900
4512
|
|
|
3901
4513
|
async function getProjectLocalPath(projectId) {
|
|
@@ -4329,13 +4941,28 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
4329
4941
|
env.CONDUCTOR_BACKEND_URL = BACKEND_HTTP;
|
|
4330
4942
|
}
|
|
4331
4943
|
|
|
4332
|
-
const child =
|
|
4333
|
-
|
|
4944
|
+
const { child, tmuxSession } = spawnFireProcess({
|
|
4945
|
+
taskId,
|
|
4946
|
+
args,
|
|
4334
4947
|
env,
|
|
4335
|
-
|
|
4948
|
+
cwd: taskDir,
|
|
4949
|
+
logPath,
|
|
4336
4950
|
});
|
|
4337
4951
|
|
|
4338
|
-
|
|
4952
|
+
// In normal mode we rename the placeholder run-timestamp dir to embed
|
|
4953
|
+
// the freshly spawned child pid. We must NOT do this in tmux mode:
|
|
4954
|
+
// 1) child.pid is the short-lived tmux client, not Fire — misleading.
|
|
4955
|
+
// 2) Fire's stdout/stderr is redirected to logPath inside the tmux
|
|
4956
|
+
// `bash -c ...` command, which captures the path *before* this
|
|
4957
|
+
// rename runs. Renaming the parent dir afterwards would break that
|
|
4958
|
+
// redirection (file open would fail) and Fire would never start.
|
|
4959
|
+
if (
|
|
4960
|
+
!tmuxSession &&
|
|
4961
|
+
!boundPath &&
|
|
4962
|
+
runTimestampPart &&
|
|
4963
|
+
Number.isInteger(child?.pid) &&
|
|
4964
|
+
child.pid > 0
|
|
4965
|
+
) {
|
|
4339
4966
|
const desiredTaskDir = path.join(path.dirname(taskDir), `${runTimestampPart}_pid_${child.pid}`);
|
|
4340
4967
|
if (desiredTaskDir !== taskDir) {
|
|
4341
4968
|
try {
|
|
@@ -4378,6 +5005,8 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
4378
5005
|
logPath,
|
|
4379
5006
|
stopForceKillTimer: null,
|
|
4380
5007
|
managedByFireBridge: true,
|
|
5008
|
+
tmuxSession: tmuxSession || null,
|
|
5009
|
+
tmuxMode: Boolean(tmuxSession),
|
|
4381
5010
|
});
|
|
4382
5011
|
|
|
4383
5012
|
client
|
|
@@ -4393,15 +5022,29 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
4393
5022
|
logError(`Failed to report task status (RUNNING) for ${taskId}: ${err?.message || err}`);
|
|
4394
5023
|
});
|
|
4395
5024
|
|
|
4396
|
-
|
|
4397
|
-
|
|
4398
|
-
|
|
4399
|
-
|
|
4400
|
-
|
|
4401
|
-
|
|
4402
|
-
child.
|
|
4403
|
-
|
|
4404
|
-
|
|
5025
|
+
// In tmux mode the Fire process writes directly to logPath via the
|
|
5026
|
+
// shell redirection inside the tmux session, so the daemon does not
|
|
5027
|
+
// pipe stdout/stderr. We only attach the data listeners in normal mode.
|
|
5028
|
+
if (!tmuxSession) {
|
|
5029
|
+
if (child.stdout && typeof child.stdout.pipe === "function" && logStream) {
|
|
5030
|
+
child.stdout.pipe(logStream, { end: false });
|
|
5031
|
+
} else if (child.stdout && typeof child.stdout.on === "function" && logStream) {
|
|
5032
|
+
child.stdout.on("data", (chunk) => logStream.write(chunk));
|
|
5033
|
+
}
|
|
5034
|
+
if (child.stderr && typeof child.stderr.pipe === "function" && logStream) {
|
|
5035
|
+
child.stderr.pipe(logStream, { end: false });
|
|
5036
|
+
} else if (child.stderr && typeof child.stderr.on === "function" && logStream) {
|
|
5037
|
+
child.stderr.on("data", (chunk) => logStream.write(chunk));
|
|
5038
|
+
}
|
|
5039
|
+
} else if (child.stderr && typeof child.stderr.on === "function") {
|
|
5040
|
+
// Capture any error output emitted by the tmux client itself so
|
|
5041
|
+
// problems during session creation surface in daemon logs.
|
|
5042
|
+
child.stderr.on("data", (chunk) => {
|
|
5043
|
+
const text = chunk?.toString?.("utf8") ?? String(chunk ?? "");
|
|
5044
|
+
if (text.trim()) {
|
|
5045
|
+
logError(`tmux(${tmuxSession}) stderr: ${text.trim()}`);
|
|
5046
|
+
}
|
|
5047
|
+
});
|
|
4405
5048
|
}
|
|
4406
5049
|
|
|
4407
5050
|
child.on("error", (err) => {
|
|
@@ -4414,6 +5057,37 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
4414
5057
|
|
|
4415
5058
|
child.on("exit", (code, signal) => {
|
|
4416
5059
|
const active = activeTaskProcesses.get(taskId);
|
|
5060
|
+
|
|
5061
|
+
// In tmux mode the `tmux new-session -d` client always exits
|
|
5062
|
+
// shortly after launching the Fire session. A clean exit (code 0,
|
|
5063
|
+
// no signal) just means the session was successfully created and
|
|
5064
|
+
// Fire is now running detached under the tmux server. We must NOT
|
|
5065
|
+
// remove the task from activeTaskProcesses or report a terminal
|
|
5066
|
+
// status in that case — Fire is still alive.
|
|
5067
|
+
if (active?.tmuxMode && !signal && code === 0) {
|
|
5068
|
+
log(`Fire launched in detached tmux session: ${active.tmuxSession || "(unknown)"}`);
|
|
5069
|
+
if (logStream) {
|
|
5070
|
+
const ts = new Date().toLocaleString("sv-SE", { timeZone: "Asia/Shanghai" }).replace(" ", "T");
|
|
5071
|
+
logStream.write(
|
|
5072
|
+
`[daemon ${ts}] tmux session ${active.tmuxSession || "?"} created; Fire detached from daemon\n`,
|
|
5073
|
+
);
|
|
5074
|
+
}
|
|
5075
|
+
return;
|
|
5076
|
+
}
|
|
5077
|
+
|
|
5078
|
+
// tmux mode + non-zero/signaled exit means `tmux new-session` itself
|
|
5079
|
+
// failed (duplicate session, invalid args, tmux server crashed, …).
|
|
5080
|
+
// Fire never started, so the bridge-managed assumption that "Fire
|
|
5081
|
+
// will report its own terminal status" no longer holds — force the
|
|
5082
|
+
// daemon to report failure so the backend doesn't get stuck on
|
|
5083
|
+
// RUNNING.
|
|
5084
|
+
if (active?.tmuxMode) {
|
|
5085
|
+
active.forceDaemonTerminalStatusReport = true;
|
|
5086
|
+
logError(
|
|
5087
|
+
`tmux session ${active.tmuxSession || "?"} for task ${taskId} failed to launch (code=${code}, signal=${signal || "null"})`,
|
|
5088
|
+
);
|
|
5089
|
+
}
|
|
5090
|
+
|
|
4417
5091
|
if (active?.stopForceKillTimer) {
|
|
4418
5092
|
clearTimeout(active.stopForceKillTimer);
|
|
4419
5093
|
}
|
|
@@ -4565,7 +5239,31 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
4565
5239
|
let refreshStoppedActiveTask = false;
|
|
4566
5240
|
let startupTerminalStatusReported = false;
|
|
4567
5241
|
try {
|
|
4568
|
-
|
|
5242
|
+
let activeTarget = activeTaskProcesses.get(normalizedTargetTaskId);
|
|
5243
|
+
|
|
5244
|
+
// For tmux-mode entries, `activeTarget.child` is the long-dead
|
|
5245
|
+
// `tmux new-session` client object — truthy regardless of whether
|
|
5246
|
+
// the underlying tmux session (and Fire inside it) is still alive.
|
|
5247
|
+
// Probe the actual session and clean up stale entries on demand so
|
|
5248
|
+
// the restart gating below reflects reality, not the stale record.
|
|
5249
|
+
if (activeTarget?.tmuxMode && activeTarget.tmuxSession) {
|
|
5250
|
+
const sessionAlive = await tmuxSessionExists(activeTarget.tmuxSession);
|
|
5251
|
+
if (
|
|
5252
|
+
!sessionAlive &&
|
|
5253
|
+
activeTaskProcesses.get(normalizedTargetTaskId) === activeTarget
|
|
5254
|
+
) {
|
|
5255
|
+
log(
|
|
5256
|
+
`Tmux session ${activeTarget.tmuxSession} for task ${normalizedTargetTaskId} no longer exists; clearing stale activeTaskProcesses entry before restart`,
|
|
5257
|
+
);
|
|
5258
|
+
if (activeTarget.stopForceKillTimer) {
|
|
5259
|
+
clearTimeout(activeTarget.stopForceKillTimer);
|
|
5260
|
+
activeTarget.stopForceKillTimer = null;
|
|
5261
|
+
}
|
|
5262
|
+
activeTaskProcesses.delete(normalizedTargetTaskId);
|
|
5263
|
+
activeTarget = undefined;
|
|
5264
|
+
}
|
|
5265
|
+
}
|
|
5266
|
+
|
|
4569
5267
|
if (isRefreshSessionInplace) {
|
|
4570
5268
|
if (!activeTarget?.child) {
|
|
4571
5269
|
reportRestartFailure({
|
|
@@ -4579,12 +5277,16 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
4579
5277
|
return;
|
|
4580
5278
|
}
|
|
4581
5279
|
} else if (activeTarget?.child) {
|
|
5280
|
+
// tmux-mode failures point at the tmux session, not a meaningless pid.
|
|
5281
|
+
const description = activeTarget.tmuxMode
|
|
5282
|
+
? `task already active in tmux session ${activeTarget.tmuxSession || "(unknown)"} — stop the task before restarting`
|
|
5283
|
+
: `task already active (pid=${activeTarget.child.pid ?? "unknown"})`;
|
|
4582
5284
|
reportRestartFailure({
|
|
4583
5285
|
taskId: normalizedTargetTaskId,
|
|
4584
5286
|
projectId: normalizedProjectId,
|
|
4585
5287
|
requestId,
|
|
4586
5288
|
mode: normalizedMode,
|
|
4587
|
-
error: new Error(
|
|
5289
|
+
error: new Error(description),
|
|
4588
5290
|
});
|
|
4589
5291
|
return;
|
|
4590
5292
|
}
|
|
@@ -4882,10 +5584,12 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
4882
5584
|
env.CONDUCTOR_BACKEND_URL = BACKEND_HTTP;
|
|
4883
5585
|
}
|
|
4884
5586
|
|
|
4885
|
-
const child =
|
|
4886
|
-
|
|
5587
|
+
const { child, tmuxSession } = spawnFireProcess({
|
|
5588
|
+
taskId: normalizedTargetTaskId,
|
|
5589
|
+
args,
|
|
4887
5590
|
env,
|
|
4888
|
-
|
|
5591
|
+
cwd: taskDir,
|
|
5592
|
+
logPath,
|
|
4889
5593
|
});
|
|
4890
5594
|
|
|
4891
5595
|
let logStream;
|
|
@@ -4919,18 +5623,32 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
4919
5623
|
logPath,
|
|
4920
5624
|
stopForceKillTimer: null,
|
|
4921
5625
|
managedByFireBridge: true,
|
|
5626
|
+
tmuxSession: tmuxSession || null,
|
|
5627
|
+
tmuxMode: Boolean(tmuxSession),
|
|
4922
5628
|
};
|
|
4923
5629
|
activeTaskProcesses.set(normalizedTargetTaskId, activeProcessRecord);
|
|
4924
5630
|
|
|
4925
|
-
|
|
4926
|
-
|
|
4927
|
-
|
|
4928
|
-
|
|
4929
|
-
|
|
4930
|
-
|
|
4931
|
-
child.
|
|
4932
|
-
|
|
4933
|
-
|
|
5631
|
+
// In tmux mode the Fire process writes directly to logPath via shell
|
|
5632
|
+
// redirection inside the tmux session, so the daemon does not pipe
|
|
5633
|
+
// stdout/stderr. We only attach the data listeners in normal mode.
|
|
5634
|
+
if (!tmuxSession) {
|
|
5635
|
+
if (child.stdout && typeof child.stdout.pipe === "function" && logStream) {
|
|
5636
|
+
child.stdout.pipe(logStream, { end: false });
|
|
5637
|
+
} else if (child.stdout && typeof child.stdout.on === "function" && logStream) {
|
|
5638
|
+
child.stdout.on("data", (chunk) => logStream.write(chunk));
|
|
5639
|
+
}
|
|
5640
|
+
if (child.stderr && typeof child.stderr.pipe === "function" && logStream) {
|
|
5641
|
+
child.stderr.pipe(logStream, { end: false });
|
|
5642
|
+
} else if (child.stderr && typeof child.stderr.on === "function" && logStream) {
|
|
5643
|
+
child.stderr.on("data", (chunk) => logStream.write(chunk));
|
|
5644
|
+
}
|
|
5645
|
+
} else if (child.stderr && typeof child.stderr.on === "function") {
|
|
5646
|
+
child.stderr.on("data", (chunk) => {
|
|
5647
|
+
const text = chunk?.toString?.("utf8") ?? String(chunk ?? "");
|
|
5648
|
+
if (text.trim()) {
|
|
5649
|
+
logError(`tmux(${tmuxSession}) stderr: ${text.trim()}`);
|
|
5650
|
+
}
|
|
5651
|
+
});
|
|
4934
5652
|
}
|
|
4935
5653
|
|
|
4936
5654
|
child.on("error", (err) => {
|
|
@@ -4943,6 +5661,33 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
4943
5661
|
|
|
4944
5662
|
child.on("exit", (code, signal) => {
|
|
4945
5663
|
const active = activeTaskProcesses.get(normalizedTargetTaskId);
|
|
5664
|
+
|
|
5665
|
+
// In tmux mode the `tmux new-session -d` client always exits soon
|
|
5666
|
+
// after launching the session. A clean exit (code 0, no signal) means
|
|
5667
|
+
// Fire is now running detached under the tmux server — keep the task
|
|
5668
|
+
// record and skip terminal-status reporting.
|
|
5669
|
+
if (active?.tmuxMode && !signal && code === 0) {
|
|
5670
|
+
log(`Fire restart launched in detached tmux session: ${active.tmuxSession || "(unknown)"}`);
|
|
5671
|
+
if (logStream) {
|
|
5672
|
+
const ts = new Date().toLocaleString("sv-SE", { timeZone: "Asia/Shanghai" }).replace(" ", "T");
|
|
5673
|
+
logStream.write(
|
|
5674
|
+
`[daemon ${ts}] tmux session ${active.tmuxSession || "?"} created; Fire detached from daemon\n`,
|
|
5675
|
+
);
|
|
5676
|
+
}
|
|
5677
|
+
return;
|
|
5678
|
+
}
|
|
5679
|
+
|
|
5680
|
+
// tmux mode + non-zero/signaled exit means the `tmux new-session` call
|
|
5681
|
+
// failed before Fire could start (e.g. duplicate session for the same
|
|
5682
|
+
// task id, tmux server crashed). Force the daemon to report failure so
|
|
5683
|
+
// the backend doesn't get stuck on RUNNING.
|
|
5684
|
+
if (active?.tmuxMode) {
|
|
5685
|
+
active.forceDaemonTerminalStatusReport = true;
|
|
5686
|
+
logError(
|
|
5687
|
+
`tmux session ${active.tmuxSession || "?"} for restart of task ${normalizedTargetTaskId} failed to launch (code=${code}, signal=${signal || "null"})`,
|
|
5688
|
+
);
|
|
5689
|
+
}
|
|
5690
|
+
|
|
4946
5691
|
if (active?.stopForceKillTimer) {
|
|
4947
5692
|
clearTimeout(active.stopForceKillTimer);
|
|
4948
5693
|
}
|
|
@@ -5056,10 +5801,15 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
5056
5801
|
|
|
5057
5802
|
closePromise = (async () => {
|
|
5058
5803
|
daemonShuttingDown = true;
|
|
5804
|
+
daemonShutdownInProgress = true;
|
|
5059
5805
|
if (watchdogTimer) {
|
|
5060
5806
|
clearInterval(watchdogTimer);
|
|
5061
5807
|
watchdogTimer = null;
|
|
5062
5808
|
}
|
|
5809
|
+
if (tmuxLivenessTimer) {
|
|
5810
|
+
clearInterval(tmuxLivenessTimer);
|
|
5811
|
+
tmuxLivenessTimer = null;
|
|
5812
|
+
}
|
|
5063
5813
|
const activeProcessEntries = [...activeTaskProcesses.entries()];
|
|
5064
5814
|
const activePtyEntries = [...activePtySessions.entries()];
|
|
5065
5815
|
const activeEntries = [...activeProcessEntries, ...activePtyEntries];
|
|
@@ -5097,6 +5847,16 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
5097
5847
|
if (record?.stopForceKillTimer) {
|
|
5098
5848
|
clearTimeout(record.stopForceKillTimer);
|
|
5099
5849
|
}
|
|
5850
|
+
// In tmux mode the Fire process runs detached under the tmux server.
|
|
5851
|
+
// Daemon shutdown must NOT terminate those Fire processes — that's
|
|
5852
|
+
// the whole purpose of the mode. Skip the kill and let them keep
|
|
5853
|
+
// running.
|
|
5854
|
+
if (record?.tmuxMode) {
|
|
5855
|
+
log(
|
|
5856
|
+
`Daemon shutting down: leaving tmux-detached Fire task ${taskId} (session=${record.tmuxSession || "?"}) running`,
|
|
5857
|
+
);
|
|
5858
|
+
continue;
|
|
5859
|
+
}
|
|
5100
5860
|
try {
|
|
5101
5861
|
if (typeof record.child?.kill === "function") {
|
|
5102
5862
|
record.child.kill("SIGTERM");
|