@triflux/remote 10.0.0-alpha.1 → 10.0.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hub/index.mjs +21 -0
- package/hub/pipe.mjs +98 -13
- package/hub/server.mjs +1245 -1124
- package/hub/store-adapter.mjs +14 -747
- package/hub/store.mjs +4 -44
- package/hub/team/backend.mjs +1 -1
- package/hub/team/cli/services/hub-client.mjs +38 -19
- package/hub/team/cli/services/native-control.mjs +1 -1
- package/hub/team/conductor.mjs +671 -0
- package/hub/team/event-log.mjs +76 -0
- package/hub/team/headless.mjs +8 -6
- package/hub/team/health-probe.mjs +272 -0
- package/hub/team/launcher-template.mjs +95 -0
- package/hub/team/lead-control.mjs +104 -0
- package/hub/team/nativeProxy.mjs +9 -2
- package/hub/team/notify.mjs +293 -0
- package/hub/team/pane.mjs +1 -1
- package/hub/team/process-cleanup.mjs +342 -0
- package/hub/team/psmux.mjs +1 -1
- package/hub/team/remote-probe.mjs +276 -0
- package/hub/team/remote-watcher.mjs +478 -0
- package/hub/team/session-sync.mjs +169 -0
- package/hub/team/staleState.mjs +1 -1
- package/hub/team/swarm-hypervisor.mjs +554 -0
- package/hub/team/swarm-locks.mjs +204 -0
- package/hub/team/swarm-planner.mjs +256 -0
- package/hub/team/swarm-reconciler.mjs +137 -0
- package/hub/team/tui-remote-adapter.mjs +393 -0
- package/hub/team/tui.mjs +206 -2
- package/hub/team/worktree-lifecycle.mjs +172 -0
- package/hub/tools.mjs +94 -12
- package/hub/tray.mjs +1 -1
- package/hub/workers/codex-mcp.mjs +8 -2
- package/hub/workers/gemini-worker.mjs +2 -1
- package/package.json +1 -1
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
// hub/team/conductor.mjs — 세션 오케스트레이션 Conductor
|
|
2
|
+
// native-supervisor.mjs의 spawn/kill을 래핑하되, 상태 머신 + health probe +
|
|
3
|
+
// auto-restart + event log를 추가하여 "조용한 실패"를 구조적으로 불가능하게 만든다.
|
|
4
|
+
//
|
|
5
|
+
// 기존 native-supervisor와의 차이:
|
|
6
|
+
// 1. 상태 머신 (alive/dead → 7 states + 2 terminal)
|
|
7
|
+
// 2. Health probe 4단계 (+ INPUT_WAIT 감지)
|
|
8
|
+
// 3. Auto-restart (maxRestarts=3)
|
|
9
|
+
// 4. JSONL event log (블랙박스 리코더)
|
|
10
|
+
|
|
11
|
+
import { spawn, execFile } from 'node:child_process';
|
|
12
|
+
import { dirname, join } from 'node:path';
|
|
13
|
+
import { homedir } from 'node:os';
|
|
14
|
+
import { mkdirSync, createWriteStream, readFileSync, copyFileSync } from 'node:fs';
|
|
15
|
+
import { EventEmitter } from 'node:events';
|
|
16
|
+
|
|
17
|
+
import { killProcess, IS_WINDOWS } from '@triflux/core/hub/platform.mjs';
|
|
18
|
+
import { createEventLog } from './event-log.mjs';
|
|
19
|
+
import { createHealthProbe } from './health-probe.mjs';
|
|
20
|
+
import { createRemoteProbe } from './remote-probe.mjs';
|
|
21
|
+
import { buildLauncher } from './launcher-template.mjs';
|
|
22
|
+
import { broker } from '@triflux/core/hub/account-broker.mjs';
|
|
23
|
+
|
|
24
|
+
/** 세션 상태 */
|
|
25
|
+
export const STATES = Object.freeze({
|
|
26
|
+
INIT: 'init',
|
|
27
|
+
STARTING: 'starting',
|
|
28
|
+
HEALTHY: 'healthy',
|
|
29
|
+
STALLED: 'stalled',
|
|
30
|
+
INPUT_WAIT: 'input_wait',
|
|
31
|
+
FAILED: 'failed',
|
|
32
|
+
RESTARTING: 'restarting',
|
|
33
|
+
DEAD: 'dead',
|
|
34
|
+
COMPLETED: 'completed',
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
/** 유효한 상태 전이 테이블 */
|
|
38
|
+
const TRANSITIONS = Object.freeze({
|
|
39
|
+
[STATES.INIT]: [STATES.STARTING],
|
|
40
|
+
[STATES.STARTING]: [STATES.HEALTHY, STATES.FAILED],
|
|
41
|
+
[STATES.HEALTHY]: [STATES.STALLED, STATES.INPUT_WAIT, STATES.FAILED, STATES.COMPLETED],
|
|
42
|
+
[STATES.STALLED]: [STATES.HEALTHY, STATES.FAILED],
|
|
43
|
+
[STATES.INPUT_WAIT]: [STATES.HEALTHY, STATES.FAILED],
|
|
44
|
+
[STATES.FAILED]: [STATES.RESTARTING, STATES.DEAD],
|
|
45
|
+
[STATES.RESTARTING]: [STATES.STARTING],
|
|
46
|
+
[STATES.DEAD]: [],
|
|
47
|
+
[STATES.COMPLETED]: [],
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
const TERMINAL_STATES = new Set([STATES.DEAD, STATES.COMPLETED]);
|
|
51
|
+
const DEFAULT_MAX_RESTARTS = 3;
|
|
52
|
+
const DEFAULT_GRACE_MS = 10_000;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Conductor 팩토리.
|
|
56
|
+
* @param {object} opts
|
|
57
|
+
* @param {string} opts.logsDir — 이벤트 로그 디렉토리
|
|
58
|
+
* @param {number} [opts.maxRestarts=3]
|
|
59
|
+
* @param {number} [opts.graceMs=10000] — shutdown grace period
|
|
60
|
+
* @param {object} [opts.probeOpts] — health-probe 옵션 오버라이드
|
|
61
|
+
* @returns {Conductor}
|
|
62
|
+
*/
|
|
63
|
+
export function createConductor(opts = {}) {
|
|
64
|
+
const {
|
|
65
|
+
logsDir,
|
|
66
|
+
maxRestarts = DEFAULT_MAX_RESTARTS,
|
|
67
|
+
graceMs = DEFAULT_GRACE_MS,
|
|
68
|
+
probeOpts = {},
|
|
69
|
+
} = opts;
|
|
70
|
+
|
|
71
|
+
if (!logsDir) throw new Error('logsDir is required');
|
|
72
|
+
mkdirSync(logsDir, { recursive: true });
|
|
73
|
+
|
|
74
|
+
const emitter = new EventEmitter();
|
|
75
|
+
const sessions = new Map();
|
|
76
|
+
let shuttingDown = false;
|
|
77
|
+
|
|
78
|
+
// 공유 event log (모든 세션 이벤트를 하나의 JSONL에)
|
|
79
|
+
const eventLog = createEventLog(join(logsDir, 'conductor-events.jsonl'));
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* 세션 상태 전이.
|
|
83
|
+
* @param {object} session
|
|
84
|
+
* @param {string} nextState
|
|
85
|
+
* @param {string} [reason]
|
|
86
|
+
*/
|
|
87
|
+
function transition(session, nextState, reason = '') {
|
|
88
|
+
const valid = TRANSITIONS[session.state] || [];
|
|
89
|
+
if (!valid.includes(nextState)) {
|
|
90
|
+
eventLog.append('invalid_transition', {
|
|
91
|
+
session: session.id,
|
|
92
|
+
from: session.state,
|
|
93
|
+
to: nextState,
|
|
94
|
+
reason,
|
|
95
|
+
});
|
|
96
|
+
return false;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const prev = session.state;
|
|
100
|
+
session.state = nextState;
|
|
101
|
+
|
|
102
|
+
eventLog.append('stateChange', {
|
|
103
|
+
session: session.id,
|
|
104
|
+
from: prev,
|
|
105
|
+
to: nextState,
|
|
106
|
+
reason,
|
|
107
|
+
restarts: session.restarts,
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
emitter.emit('stateChange', { sessionId: session.id, from: prev, to: nextState, reason });
|
|
111
|
+
|
|
112
|
+
// Terminal state cleanup
|
|
113
|
+
if (TERMINAL_STATES.has(nextState)) {
|
|
114
|
+
session.probe?.stop();
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return true;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* 프로세스를 강제 종료.
|
|
122
|
+
* Windows: taskkill /T /F /PID (프로세스 트리). POSIX: SIGKILL.
|
|
123
|
+
*/
|
|
124
|
+
function forceKill(pid) {
|
|
125
|
+
if (!pid || pid <= 0) return;
|
|
126
|
+
killProcess(pid, { signal: 'SIGKILL', tree: true, force: true, timeout: 5000 });
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* 원격 세션의 psmux 세션을 SSH 경유로 kill.
|
|
131
|
+
* fire-and-forget: 실패해도 에러 전파 안 함.
|
|
132
|
+
*/
|
|
133
|
+
function killRemoteSession(session) {
|
|
134
|
+
const host = session.config.host;
|
|
135
|
+
if (!host) return;
|
|
136
|
+
let sshUser = session.config.sshUser;
|
|
137
|
+
let sshIp = host;
|
|
138
|
+
// hosts.json에서 ssh_user/IP 해결
|
|
139
|
+
try {
|
|
140
|
+
const hostsPath = join(opts.repoRoot || process.cwd(), 'references', 'hosts.json');
|
|
141
|
+
const hosts = JSON.parse(readFileSync(hostsPath, 'utf8'));
|
|
142
|
+
const hostCfg = hosts.hosts?.[host];
|
|
143
|
+
if (hostCfg) {
|
|
144
|
+
sshUser = sshUser || hostCfg.ssh_user;
|
|
145
|
+
sshIp = hostCfg.tailscale?.ip || host;
|
|
146
|
+
}
|
|
147
|
+
} catch { /* hosts.json 없으면 fallback */ }
|
|
148
|
+
if (!sshUser) return;
|
|
149
|
+
const execFn = opts.deps?.execFile || execFile;
|
|
150
|
+
execFn('ssh', [`${sshUser}@${sshIp}`, 'psmux', 'kill-session', '-t', session.id],
|
|
151
|
+
{ timeout: 10_000 }, () => {});
|
|
152
|
+
eventLog.append('remote_kill', { session: session.id, host, sshUser, sshIp });
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* 단일 세션의 child process를 정리.
|
|
157
|
+
* 원격 세션은 SSH 경유 psmux kill-session으로 정리.
|
|
158
|
+
*/
|
|
159
|
+
async function cleanupChild(session) {
|
|
160
|
+
session.probe?.stop();
|
|
161
|
+
|
|
162
|
+
// 원격 세션 — SSH 경유 psmux kill-session
|
|
163
|
+
if (session.config.remote) {
|
|
164
|
+
killRemoteSession(session);
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const child = session.child;
|
|
169
|
+
if (!child) return;
|
|
170
|
+
|
|
171
|
+
const pid = child.pid;
|
|
172
|
+
if (!pid) return;
|
|
173
|
+
|
|
174
|
+
// SIGTERM 먼저
|
|
175
|
+
try { child.kill('SIGTERM'); } catch { /* already dead */ }
|
|
176
|
+
|
|
177
|
+
// Grace period 대기
|
|
178
|
+
await new Promise((resolve) => {
|
|
179
|
+
const timer = setTimeout(() => {
|
|
180
|
+
forceKill(pid);
|
|
181
|
+
resolve();
|
|
182
|
+
}, graceMs);
|
|
183
|
+
timer.unref?.();
|
|
184
|
+
child.once('exit', () => {
|
|
185
|
+
clearTimeout(timer);
|
|
186
|
+
resolve();
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Health probe 콜백 — probe 결과에 따라 상태 전이 판단.
|
|
193
|
+
*/
|
|
194
|
+
function handleProbeResult(session, result) {
|
|
195
|
+
if (TERMINAL_STATES.has(session.state)) return;
|
|
196
|
+
if (session.state === STATES.INIT || session.state === STATES.RESTARTING) return;
|
|
197
|
+
|
|
198
|
+
eventLog.append('health', {
|
|
199
|
+
session: session.id,
|
|
200
|
+
...result,
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// L0 실패 — 로컬: exit handler에서 처리. 원격: probe가 유일한 감지 수단.
|
|
204
|
+
if (result.l0 === 'fail') {
|
|
205
|
+
if (session.config.remote) {
|
|
206
|
+
handleFailure(session, 'remote_L0_fail');
|
|
207
|
+
}
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// L3 completed (원격 완료 토큰 감지)
|
|
212
|
+
if (result.l3 === 'completed' && session.config.remote) {
|
|
213
|
+
transition(session, STATES.COMPLETED, 'remote_completion_token');
|
|
214
|
+
emitter.emit('completed', { sessionId: session.id });
|
|
215
|
+
if (typeof session.config.onCompleted === 'function') {
|
|
216
|
+
session.config.onCompleted({ sessionId: session.id });
|
|
217
|
+
}
|
|
218
|
+
maybeAutoShutdown();
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// L1 INPUT_WAIT 감지
|
|
223
|
+
if (result.l1 === 'input_wait' && session.state === STATES.HEALTHY) {
|
|
224
|
+
transition(session, STATES.INPUT_WAIT, `input_wait:${result.inputWaitPattern}`);
|
|
225
|
+
emitter.emit('inputWait', {
|
|
226
|
+
sessionId: session.id,
|
|
227
|
+
pattern: result.inputWaitPattern,
|
|
228
|
+
});
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// INPUT_WAIT → output 재개 시 HEALTHY 복귀
|
|
233
|
+
if (session.state === STATES.INPUT_WAIT && result.l1 === 'ok') {
|
|
234
|
+
transition(session, STATES.HEALTHY, 'output_resumed');
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// L1 stall
|
|
239
|
+
if (result.l1 === 'stall' && session.state === STATES.HEALTHY) {
|
|
240
|
+
transition(session, STATES.STALLED, 'L1_stall');
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// STALLED → output 재개 시 HEALTHY 복귀
|
|
245
|
+
if (session.state === STATES.STALLED && result.l1 === 'ok') {
|
|
246
|
+
transition(session, STATES.HEALTHY, 'output_resumed');
|
|
247
|
+
return;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// L3 timeout (아직 STARTING 상태)
|
|
251
|
+
if (result.l3 === 'timeout' && session.state === STATES.STARTING) {
|
|
252
|
+
handleFailure(session, 'L3_timeout');
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// STARTING → L0 ok + L3 ok → HEALTHY
|
|
257
|
+
if (session.state === STATES.STARTING && result.l0 === 'ok' && result.l3 === 'ok') {
|
|
258
|
+
transition(session, STATES.HEALTHY, 'probe_healthy');
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// STARTING → L0 ok (L3 아직 미판정) → STARTING 유지 (대기)
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* 실패 처리 — restart 또는 DEAD.
|
|
267
|
+
*/
|
|
268
|
+
function handleFailure(session, reason) {
|
|
269
|
+
if (TERMINAL_STATES.has(session.state)) return;
|
|
270
|
+
|
|
271
|
+
transition(session, STATES.FAILED, reason);
|
|
272
|
+
|
|
273
|
+
if (session.restarts < maxRestarts) {
|
|
274
|
+
transition(session, STATES.RESTARTING, `restart_${session.restarts + 1}/${maxRestarts}`);
|
|
275
|
+
session.restarts += 1;
|
|
276
|
+
void respawnSession(session);
|
|
277
|
+
} else {
|
|
278
|
+
transition(session, STATES.DEAD, `maxRestarts(${maxRestarts})_exceeded`);
|
|
279
|
+
emitter.emit('dead', { sessionId: session.id, reason });
|
|
280
|
+
|
|
281
|
+
// broker release on final death
|
|
282
|
+
if (broker && session.config.accountId) {
|
|
283
|
+
broker.release(session.config.accountId, { ok: false, failureMode: session.lastFailureMode });
|
|
284
|
+
if (session.lastFailureMode === 'rate_limited') {
|
|
285
|
+
broker.markRateLimited(session.config.accountId, 5 * 60 * 1000);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* 세션의 child process를 (재)시작.
|
|
293
|
+
*/
|
|
294
|
+
async function respawnSession(session) {
|
|
295
|
+
// 기존 child 정리
|
|
296
|
+
await cleanupChild(session);
|
|
297
|
+
|
|
298
|
+
transition(session, STATES.STARTING, session.restarts > 0 ? 'respawn' : 'initial');
|
|
299
|
+
|
|
300
|
+
const launcher = session.launcher;
|
|
301
|
+
const outPath = join(logsDir, `${session.id}.out.log`);
|
|
302
|
+
const errPath = join(logsDir, `${session.id}.err.log`);
|
|
303
|
+
mkdirSync(logsDir, { recursive: true });
|
|
304
|
+
|
|
305
|
+
const outWs = createWriteStream(outPath, { flags: 'a' });
|
|
306
|
+
const errWs = createWriteStream(errPath, { flags: 'a' });
|
|
307
|
+
|
|
308
|
+
let outputBytes = 0;
|
|
309
|
+
let recentOutput = '';
|
|
310
|
+
|
|
311
|
+
let child;
|
|
312
|
+
try {
|
|
313
|
+
child = spawn(launcher.command, {
|
|
314
|
+
shell: true,
|
|
315
|
+
env: { ...process.env, ...launcher.env, ...(session.config.env || {}) },
|
|
316
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
317
|
+
windowsHide: true,
|
|
318
|
+
});
|
|
319
|
+
} catch (err) {
|
|
320
|
+
eventLog.append('spawn_error', { session: session.id, error: err.message });
|
|
321
|
+
handleFailure(session, `spawn_error:${err.message}`);
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
session.child = child;
|
|
326
|
+
session.outPath = outPath;
|
|
327
|
+
session.errPath = errPath;
|
|
328
|
+
|
|
329
|
+
eventLog.append('spawn', {
|
|
330
|
+
session: session.id,
|
|
331
|
+
agent: session.config.agent,
|
|
332
|
+
pid: child.pid,
|
|
333
|
+
command: launcher.command,
|
|
334
|
+
restart: session.restarts,
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
// stdout+stderr 통합 추적 (F3 해결: stderr만 출력되는 경우도 advancing 판정)
|
|
338
|
+
const trackOutput = (buf) => {
|
|
339
|
+
outputBytes += buf.length;
|
|
340
|
+
const txt = String(buf);
|
|
341
|
+
// 최근 2KB만 유지 (INPUT_WAIT 패턴 감지용)
|
|
342
|
+
recentOutput += txt;
|
|
343
|
+
if (recentOutput.length > 2048) {
|
|
344
|
+
recentOutput = recentOutput.slice(-2048);
|
|
345
|
+
}
|
|
346
|
+
};
|
|
347
|
+
|
|
348
|
+
child.stdout?.on('data', (buf) => { outWs.write(buf); trackOutput(buf); });
|
|
349
|
+
child.stderr?.on('data', (buf) => { errWs.write(buf); trackOutput(buf); });
|
|
350
|
+
|
|
351
|
+
child.on('exit', (code, signal) => {
|
|
352
|
+
session.alive = false;
|
|
353
|
+
try { outWs.end(); } catch { /* ignore */ }
|
|
354
|
+
try { errWs.end(); } catch { /* ignore */ }
|
|
355
|
+
|
|
356
|
+
eventLog.append('exit', {
|
|
357
|
+
session: session.id,
|
|
358
|
+
code,
|
|
359
|
+
signal,
|
|
360
|
+
restart: session.restarts,
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
if (TERMINAL_STATES.has(session.state)) return;
|
|
364
|
+
|
|
365
|
+
if (code === 0 && !signal) {
|
|
366
|
+
transition(session, STATES.COMPLETED, 'exit_0');
|
|
367
|
+
emitter.emit('completed', { sessionId: session.id });
|
|
368
|
+
if (typeof session.config.onCompleted === 'function') {
|
|
369
|
+
session.config.onCompleted({ sessionId: session.id });
|
|
370
|
+
}
|
|
371
|
+
if (broker && session.config.accountId) {
|
|
372
|
+
broker.release(session.config.accountId, { ok: true });
|
|
373
|
+
}
|
|
374
|
+
} else {
|
|
375
|
+
// detect rate_limited from recent output before handleFailure
|
|
376
|
+
if (/(rate.?limit|quota|throttl|too.many.requests|429|usage.limit)/ui.test(recentOutput)) {
|
|
377
|
+
session.lastFailureMode = 'rate_limited';
|
|
378
|
+
}
|
|
379
|
+
handleFailure(session, `exit_code:${code},signal:${signal}`);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
maybeAutoShutdown();
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
child.on('error', (err) => {
|
|
386
|
+
session.alive = false;
|
|
387
|
+
eventLog.append('child_error', { session: session.id, error: err.message });
|
|
388
|
+
if (!TERMINAL_STATES.has(session.state)) {
|
|
389
|
+
handleFailure(session, `child_error:${err.message}`);
|
|
390
|
+
}
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
session.alive = true;
|
|
394
|
+
|
|
395
|
+
// Health probe 설정
|
|
396
|
+
session.probe?.stop();
|
|
397
|
+
const probe = createHealthProbe(
|
|
398
|
+
{
|
|
399
|
+
get pid() { return child.pid; },
|
|
400
|
+
get alive() { return session.alive; },
|
|
401
|
+
getOutputBytes: () => outputBytes,
|
|
402
|
+
getRecentOutput: () => recentOutput,
|
|
403
|
+
},
|
|
404
|
+
{
|
|
405
|
+
...probeOpts,
|
|
406
|
+
onProbe: (result) => handleProbeResult(session, result),
|
|
407
|
+
},
|
|
408
|
+
);
|
|
409
|
+
session.probe = probe;
|
|
410
|
+
probe.start();
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* 원격 세션 시작 — child process 대신 SSH capture-pane 폴링.
|
|
415
|
+
* 원격 세션은 remote-spawn.mjs가 이미 psmux 세션을 생성한 상태를 가정.
|
|
416
|
+
*/
|
|
417
|
+
function startRemoteSession(session) {
|
|
418
|
+
transition(session, STATES.STARTING, 'remote_initial');
|
|
419
|
+
|
|
420
|
+
const { host, paneTarget, sessionName } = session.config;
|
|
421
|
+
const resolvedPane = paneTarget || `${sessionName || session.id}:0.0`;
|
|
422
|
+
const resolvedSessionName = sessionName || session.id;
|
|
423
|
+
|
|
424
|
+
eventLog.append('remote_start', {
|
|
425
|
+
session: session.id,
|
|
426
|
+
host,
|
|
427
|
+
paneTarget: resolvedPane,
|
|
428
|
+
sessionName: resolvedSessionName,
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
session.alive = true;
|
|
432
|
+
|
|
433
|
+
// Remote health probe 설정
|
|
434
|
+
session.probe?.stop();
|
|
435
|
+
const probe = createRemoteProbe(
|
|
436
|
+
{
|
|
437
|
+
host,
|
|
438
|
+
paneTarget: resolvedPane,
|
|
439
|
+
sessionName: resolvedSessionName,
|
|
440
|
+
},
|
|
441
|
+
{
|
|
442
|
+
...probeOpts,
|
|
443
|
+
onProbe: (result) => handleProbeResult(session, result),
|
|
444
|
+
},
|
|
445
|
+
);
|
|
446
|
+
session.probe = probe;
|
|
447
|
+
probe.start();
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* 모든 세션이 terminal이면 auto-shutdown.
|
|
452
|
+
*/
|
|
453
|
+
function maybeAutoShutdown() {
|
|
454
|
+
if (shuttingDown) return;
|
|
455
|
+
const allTerminal = [...sessions.values()].every(
|
|
456
|
+
(s) => TERMINAL_STATES.has(s.state),
|
|
457
|
+
);
|
|
458
|
+
if (allTerminal && sessions.size > 0) {
|
|
459
|
+
emitter.emit('allCompleted');
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// ── Public API ──────────────────────────────────────────────
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* 새 세션 spawn.
|
|
467
|
+
* @param {object} config
|
|
468
|
+
* @param {string} config.id — 세션 ID (unique)
|
|
469
|
+
* @param {'codex'|'gemini'|'claude'} config.agent
|
|
470
|
+
* @param {string} config.prompt
|
|
471
|
+
* @param {string} [config.profile]
|
|
472
|
+
* @param {string} [config.workdir]
|
|
473
|
+
* @param {string} [config.model]
|
|
474
|
+
* @param {boolean} [config.remote=false] — 원격 세션 여부
|
|
475
|
+
* @param {string} [config.host] — SSH 호스트 (remote=true 필수)
|
|
476
|
+
* @param {string} [config.paneTarget] — psmux pane target (remote용)
|
|
477
|
+
* @param {string} [config.sessionName] — psmux 세션 이름 (remote용)
|
|
478
|
+
* @param {function} [config.onCompleted] — 세션 완료 시 콜백 ({sessionId}) => void
|
|
479
|
+
* @returns {string} session ID
|
|
480
|
+
*/
|
|
481
|
+
function spawnSession(config) {
|
|
482
|
+
if (shuttingDown) throw new Error('Conductor is shutting down');
|
|
483
|
+
if (!config.id) throw new Error('session id is required');
|
|
484
|
+
if (sessions.has(config.id)) throw new Error(`Session "${config.id}" already exists`);
|
|
485
|
+
if (config.remote && !config.host) throw new Error('host is required for remote sessions');
|
|
486
|
+
|
|
487
|
+
// broker lease (graceful — broker null if accounts.json absent)
|
|
488
|
+
let lease = null;
|
|
489
|
+
if (broker && config.agent && !config.remote) {
|
|
490
|
+
lease = broker.lease({ provider: config.agent });
|
|
491
|
+
if (lease === null) {
|
|
492
|
+
const eta = broker.nextAvailableEta(config.agent);
|
|
493
|
+
eventLog.append('broker_no_lease', {
|
|
494
|
+
session: config.id,
|
|
495
|
+
agent: config.agent,
|
|
496
|
+
eta: eta ? new Date(eta).toISOString() : 'unknown',
|
|
497
|
+
});
|
|
498
|
+
// PoC: skip session when all accounts in cooldown
|
|
499
|
+
return config.id;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
// apply lease profile/env/auth to config (immutable — new object)
|
|
504
|
+
const resolvedConfig = lease
|
|
505
|
+
? {
|
|
506
|
+
...config,
|
|
507
|
+
profile: lease.profile ?? config.profile,
|
|
508
|
+
env: { ...(config.env || {}), ...(lease.env || {}) },
|
|
509
|
+
accountId: lease.id,
|
|
510
|
+
}
|
|
511
|
+
: config;
|
|
512
|
+
|
|
513
|
+
// auth file copy — broker resolved absolute path, conductor does the actual copy
|
|
514
|
+
if (lease?.mode === 'auth' && lease.authFile) {
|
|
515
|
+
const dests = config.agent === 'codex'
|
|
516
|
+
? [join(homedir(), '.codex', 'auth.json')]
|
|
517
|
+
: [
|
|
518
|
+
join(homedir(), '.gemini', 'oauth_creds.json'),
|
|
519
|
+
join(homedir(), '.gemini', 'gemini-credentials.json'),
|
|
520
|
+
];
|
|
521
|
+
for (const dest of dests) {
|
|
522
|
+
try {
|
|
523
|
+
mkdirSync(dirname(dest), { recursive: true });
|
|
524
|
+
copyFileSync(lease.authFile, dest);
|
|
525
|
+
eventLog.append('auth_copy', { session: config.id, agent: config.agent, dest });
|
|
526
|
+
} catch (err) {
|
|
527
|
+
eventLog.append('auth_copy_error', { session: config.id, dest, error: err.message });
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// 원격 세션은 launcher 불필요 (이미 원격에서 실행 중)
|
|
533
|
+
const launcher = resolvedConfig.remote
|
|
534
|
+
? null
|
|
535
|
+
: buildLauncher({
|
|
536
|
+
agent: resolvedConfig.agent,
|
|
537
|
+
profile: resolvedConfig.profile,
|
|
538
|
+
prompt: resolvedConfig.prompt,
|
|
539
|
+
workdir: resolvedConfig.workdir,
|
|
540
|
+
model: resolvedConfig.model,
|
|
541
|
+
});
|
|
542
|
+
|
|
543
|
+
const session = {
|
|
544
|
+
id: resolvedConfig.id,
|
|
545
|
+
config: resolvedConfig,
|
|
546
|
+
launcher,
|
|
547
|
+
state: STATES.INIT,
|
|
548
|
+
child: null,
|
|
549
|
+
probe: null,
|
|
550
|
+
alive: false,
|
|
551
|
+
restarts: 0,
|
|
552
|
+
outPath: null,
|
|
553
|
+
errPath: null,
|
|
554
|
+
createdAt: Date.now(),
|
|
555
|
+
};
|
|
556
|
+
|
|
557
|
+
sessions.set(resolvedConfig.id, session);
|
|
558
|
+
|
|
559
|
+
if (resolvedConfig.remote) {
|
|
560
|
+
startRemoteSession(session);
|
|
561
|
+
} else {
|
|
562
|
+
void respawnSession(session);
|
|
563
|
+
}
|
|
564
|
+
return resolvedConfig.id;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* 세션 kill.
|
|
569
|
+
* @param {string} id
|
|
570
|
+
* @param {string} [reason]
|
|
571
|
+
*/
|
|
572
|
+
async function killSession(id, reason = 'user_kill') {
|
|
573
|
+
const session = sessions.get(id);
|
|
574
|
+
if (!session) return;
|
|
575
|
+
if (TERMINAL_STATES.has(session.state)) return;
|
|
576
|
+
|
|
577
|
+
eventLog.append('kill', { session: id, reason });
|
|
578
|
+
await cleanupChild(session);
|
|
579
|
+
transition(session, STATES.FAILED, reason);
|
|
580
|
+
transition(session, STATES.DEAD, reason);
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
/**
|
|
584
|
+
* 세션에 stdin 입력 전송 (INPUT_WAIT 해소용).
|
|
585
|
+
* @param {string} id
|
|
586
|
+
* @param {string} text
|
|
587
|
+
*/
|
|
588
|
+
function sendInput(id, text) {
|
|
589
|
+
const session = sessions.get(id);
|
|
590
|
+
if (!session) return false;
|
|
591
|
+
|
|
592
|
+
// 원격 세션 — stdin 미지원 (psmux send-keys는 별도 경로)
|
|
593
|
+
if (session.config.remote) {
|
|
594
|
+
eventLog.append('stdin_remote_unsupported', { session: id });
|
|
595
|
+
return false;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
if (!session.child) return false;
|
|
599
|
+
try {
|
|
600
|
+
session.child.stdin.write(`${text}\n`);
|
|
601
|
+
eventLog.append('stdin', { session: id, text: text.slice(0, 100) });
|
|
602
|
+
return true;
|
|
603
|
+
} catch {
|
|
604
|
+
return false;
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
/**
|
|
609
|
+
* 전체 세션 스냅샷.
|
|
610
|
+
* @returns {object[]}
|
|
611
|
+
*/
|
|
612
|
+
function getSnapshot() {
|
|
613
|
+
return [...sessions.values()].map((s) => ({
|
|
614
|
+
id: s.id,
|
|
615
|
+
agent: s.config.agent,
|
|
616
|
+
state: s.state,
|
|
617
|
+
pid: s.child?.pid || null,
|
|
618
|
+
remote: s.config.remote || false,
|
|
619
|
+
host: s.config.host || null,
|
|
620
|
+
restarts: s.restarts,
|
|
621
|
+
health: s.probe?.getStatus() || null,
|
|
622
|
+
outPath: s.outPath,
|
|
623
|
+
errPath: s.errPath,
|
|
624
|
+
createdAt: s.createdAt,
|
|
625
|
+
}));
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
/**
|
|
629
|
+
* Graceful shutdown — 전체 세션 종료.
|
|
630
|
+
*/
|
|
631
|
+
async function shutdown(reason = 'shutdown') {
|
|
632
|
+
if (shuttingDown) return;
|
|
633
|
+
shuttingDown = true;
|
|
634
|
+
|
|
635
|
+
eventLog.append('shutdown', { reason, sessions: sessions.size });
|
|
636
|
+
|
|
637
|
+
const cleanups = [...sessions.values()]
|
|
638
|
+
.filter((s) => !TERMINAL_STATES.has(s.state))
|
|
639
|
+
.map(async (s) => {
|
|
640
|
+
s.probe?.stop();
|
|
641
|
+
await cleanupChild(s);
|
|
642
|
+
if (!TERMINAL_STATES.has(s.state)) {
|
|
643
|
+
transition(s, STATES.FAILED, reason);
|
|
644
|
+
transition(s, STATES.DEAD, reason);
|
|
645
|
+
}
|
|
646
|
+
});
|
|
647
|
+
|
|
648
|
+
await Promise.allSettled(cleanups);
|
|
649
|
+
await eventLog.flush();
|
|
650
|
+
await eventLog.close();
|
|
651
|
+
emitter.emit('shutdown');
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// Shutdown traps
|
|
655
|
+
const onSignal = () => { void shutdown('signal'); };
|
|
656
|
+
process.on('SIGINT', onSignal);
|
|
657
|
+
process.on('SIGTERM', onSignal);
|
|
658
|
+
|
|
659
|
+
return Object.freeze({
|
|
660
|
+
spawnSession,
|
|
661
|
+
killSession,
|
|
662
|
+
sendInput,
|
|
663
|
+
getSnapshot,
|
|
664
|
+
shutdown,
|
|
665
|
+
on: emitter.on.bind(emitter),
|
|
666
|
+
off: emitter.off.bind(emitter),
|
|
667
|
+
get sessionCount() { return sessions.size; },
|
|
668
|
+
get isShuttingDown() { return shuttingDown; },
|
|
669
|
+
get eventLogPath() { return eventLog.filePath; },
|
|
670
|
+
});
|
|
671
|
+
}
|