@love-moon/conductor-cli 0.2.16 → 0.2.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/conductor-config.js +14 -14
- package/bin/conductor-fire.js +133 -10
- package/bin/conductor-send-file.js +290 -0
- package/bin/conductor.js +5 -1
- package/package.json +5 -5
- package/src/daemon.js +389 -2
- package/src/log-collector.js +181 -0
- package/src/fire/history.js +0 -614
package/src/daemon.js
CHANGED
|
@@ -8,6 +8,7 @@ import dotenv from "dotenv";
|
|
|
8
8
|
import yaml from "js-yaml";
|
|
9
9
|
|
|
10
10
|
import { ConductorWebSocketClient, ConductorConfig, loadConfig, ConfigFileNotFound } from "@love-moon/conductor-sdk";
|
|
11
|
+
import { DaemonLogCollector } from "./log-collector.js";
|
|
11
12
|
|
|
12
13
|
dotenv.config();
|
|
13
14
|
|
|
@@ -212,6 +213,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
212
213
|
const createWebSocketClient =
|
|
213
214
|
deps.createWebSocketClient ||
|
|
214
215
|
((clientConfig, options) => new ConductorWebSocketClient(clientConfig, options));
|
|
216
|
+
const createLogCollector = deps.createLogCollector || ((backendUrl) => new DaemonLogCollector(backendUrl));
|
|
215
217
|
const PROJECT_PATH_LOOKUP_TIMEOUT_MS = parsePositiveInt(
|
|
216
218
|
process.env.CONDUCTOR_PROJECT_PATH_LOOKUP_TIMEOUT_MS,
|
|
217
219
|
1500,
|
|
@@ -228,6 +230,30 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
228
230
|
process.env.CONDUCTOR_SHUTDOWN_DISCONNECT_TIMEOUT_MS,
|
|
229
231
|
1000,
|
|
230
232
|
);
|
|
233
|
+
const DAEMON_WATCHDOG_INTERVAL_MS = parsePositiveInt(
|
|
234
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_INTERVAL_MS,
|
|
235
|
+
30_000,
|
|
236
|
+
);
|
|
237
|
+
const DAEMON_WATCHDOG_STALE_WS_MS = parsePositiveInt(
|
|
238
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_STALE_WS_MS,
|
|
239
|
+
75_000,
|
|
240
|
+
);
|
|
241
|
+
const DAEMON_WATCHDOG_CONNECT_GRACE_MS = parsePositiveInt(
|
|
242
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_CONNECT_GRACE_MS,
|
|
243
|
+
35_000,
|
|
244
|
+
);
|
|
245
|
+
const DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS = parsePositiveInt(
|
|
246
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS,
|
|
247
|
+
45_000,
|
|
248
|
+
);
|
|
249
|
+
const DAEMON_WATCHDOG_HTTP_TIMEOUT_MS = parsePositiveInt(
|
|
250
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_HTTP_TIMEOUT_MS,
|
|
251
|
+
5_000,
|
|
252
|
+
);
|
|
253
|
+
const DAEMON_WATCHDOG_MAX_SELF_HEALS = parsePositiveInt(
|
|
254
|
+
process.env.CONDUCTOR_DAEMON_WATCHDOG_MAX_SELF_HEALS,
|
|
255
|
+
3,
|
|
256
|
+
);
|
|
231
257
|
|
|
232
258
|
try {
|
|
233
259
|
mkdirSyncFn(WORKSPACE_ROOT, { recursive: true });
|
|
@@ -361,18 +387,42 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
361
387
|
|
|
362
388
|
let disconnectedSinceLastConnectedLog = false;
|
|
363
389
|
let didRecoverStaleTasks = false;
|
|
390
|
+
let daemonShuttingDown = false;
|
|
364
391
|
const activeTaskProcesses = new Map();
|
|
365
392
|
const suppressedExitStatusReports = new Set();
|
|
366
393
|
const seenCommandRequestIds = new Set();
|
|
394
|
+
let lastConnectedAt = null;
|
|
395
|
+
let lastPongAt = null;
|
|
396
|
+
let lastInboundAt = null;
|
|
397
|
+
let lastSuccessfulHttpAt = null;
|
|
398
|
+
let lastPresenceCheckAt = null;
|
|
399
|
+
let lastPresenceConfirmedAt = null;
|
|
400
|
+
let wsConnected = false;
|
|
401
|
+
let watchdogLastHealAt = 0;
|
|
402
|
+
let watchdogHealAttempts = 0;
|
|
403
|
+
let watchdogProbeInFlight = false;
|
|
404
|
+
let watchdogLastProbeErrorAt = 0;
|
|
405
|
+
let watchdogLastPresenceMismatchAt = 0;
|
|
406
|
+
let watchdogAwaitingHealthySignalAt = null;
|
|
407
|
+
let watchdogTimer = null;
|
|
408
|
+
const logCollector = createLogCollector(BACKEND_HTTP);
|
|
367
409
|
const client = createWebSocketClient(sdkConfig, {
|
|
368
410
|
extraHeaders: {
|
|
369
411
|
"x-conductor-host": AGENT_NAME,
|
|
370
412
|
"x-conductor-backends": SUPPORTED_BACKENDS.join(","),
|
|
371
413
|
},
|
|
372
|
-
onConnected: ({ isReconnect } = { isReconnect: false }) => {
|
|
414
|
+
onConnected: ({ isReconnect, connectedAt } = { isReconnect: false, connectedAt: Date.now() }) => {
|
|
415
|
+
wsConnected = true;
|
|
416
|
+
lastConnectedAt = connectedAt || Date.now();
|
|
417
|
+
lastPongAt = lastPongAt && lastPongAt > lastConnectedAt ? lastPongAt : lastConnectedAt;
|
|
373
418
|
if (!isReconnect || disconnectedSinceLastConnectedLog) {
|
|
374
419
|
log("Connected to backend");
|
|
375
420
|
}
|
|
421
|
+
if (watchdogHealAttempts > 0) {
|
|
422
|
+
watchdogAwaitingHealthySignalAt = lastConnectedAt;
|
|
423
|
+
} else {
|
|
424
|
+
watchdogAwaitingHealthySignalAt = null;
|
|
425
|
+
}
|
|
376
426
|
disconnectedSinceLastConnectedLog = false;
|
|
377
427
|
sendAgentResume(isReconnect).catch((error) => {
|
|
378
428
|
logError(`sendAgentResume failed: ${error?.message || error}`);
|
|
@@ -388,8 +438,24 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
388
438
|
});
|
|
389
439
|
}
|
|
390
440
|
},
|
|
391
|
-
onDisconnected: () => {
|
|
441
|
+
onDisconnected: (event = {}) => {
|
|
442
|
+
wsConnected = false;
|
|
392
443
|
disconnectedSinceLastConnectedLog = true;
|
|
444
|
+
if (!daemonShuttingDown) {
|
|
445
|
+
logError(
|
|
446
|
+
`[daemon-ws] Disconnected from backend: ${formatDisconnectDiagnostics(event)} (${formatDaemonHealthState({
|
|
447
|
+
connectedAt: lastConnectedAt,
|
|
448
|
+
lastPongAt,
|
|
449
|
+
lastInboundAt,
|
|
450
|
+
lastSuccessfulHttpAt,
|
|
451
|
+
lastPresenceConfirmedAt,
|
|
452
|
+
})})`,
|
|
453
|
+
);
|
|
454
|
+
}
|
|
455
|
+
},
|
|
456
|
+
onPong: ({ at }) => {
|
|
457
|
+
lastPongAt = at;
|
|
458
|
+
markWatchdogHealthy("pong", at);
|
|
393
459
|
},
|
|
394
460
|
});
|
|
395
461
|
|
|
@@ -401,6 +467,165 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
401
467
|
logError(`Failed to connect: ${err}`);
|
|
402
468
|
});
|
|
403
469
|
|
|
470
|
+
watchdogTimer = setInterval(() => {
|
|
471
|
+
void runDaemonWatchdog();
|
|
472
|
+
}, DAEMON_WATCHDOG_INTERVAL_MS);
|
|
473
|
+
if (typeof watchdogTimer?.unref === "function") {
|
|
474
|
+
watchdogTimer.unref();
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
function markBackendHttpSuccess(at = Date.now()) {
|
|
478
|
+
lastSuccessfulHttpAt = at;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
async function probeAgentPresence() {
|
|
482
|
+
lastPresenceCheckAt = Date.now();
|
|
483
|
+
try {
|
|
484
|
+
const response = await withTimeout(
|
|
485
|
+
fetchFn(`${BACKEND_HTTP}/api/agents`, {
|
|
486
|
+
method: "GET",
|
|
487
|
+
headers: {
|
|
488
|
+
Authorization: `Bearer ${AGENT_TOKEN}`,
|
|
489
|
+
Accept: "application/json",
|
|
490
|
+
},
|
|
491
|
+
}),
|
|
492
|
+
DAEMON_WATCHDOG_HTTP_TIMEOUT_MS,
|
|
493
|
+
"daemon agent presence probe",
|
|
494
|
+
);
|
|
495
|
+
if (!response.ok) {
|
|
496
|
+
return {
|
|
497
|
+
ok: false,
|
|
498
|
+
status: response.status,
|
|
499
|
+
error: `HTTP ${response.status}`,
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
const at = Date.now();
|
|
503
|
+
markBackendHttpSuccess(at);
|
|
504
|
+
const payload = await response.json();
|
|
505
|
+
const agents = Array.isArray(payload) ? payload : [];
|
|
506
|
+
const selfOnline = agents.some((entry) => String(entry?.host || "").trim() === AGENT_NAME);
|
|
507
|
+
if (selfOnline) {
|
|
508
|
+
lastPresenceConfirmedAt = at;
|
|
509
|
+
}
|
|
510
|
+
return {
|
|
511
|
+
ok: true,
|
|
512
|
+
selfOnline,
|
|
513
|
+
agentCount: agents.length,
|
|
514
|
+
};
|
|
515
|
+
} catch (error) {
|
|
516
|
+
return {
|
|
517
|
+
ok: false,
|
|
518
|
+
status: null,
|
|
519
|
+
error: error?.message || String(error),
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
function requestWatchdogSelfHeal(reason, extra = {}) {
|
|
525
|
+
if (daemonShuttingDown || !wsConnected) {
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
const now = Date.now();
|
|
529
|
+
if (watchdogLastHealAt && now - watchdogLastHealAt < DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS) {
|
|
530
|
+
return;
|
|
531
|
+
}
|
|
532
|
+
watchdogLastHealAt = now;
|
|
533
|
+
watchdogHealAttempts += 1;
|
|
534
|
+
logError(
|
|
535
|
+
`[watchdog] ${reason}; restarting daemon websocket (${watchdogHealAttempts}/${DAEMON_WATCHDOG_MAX_SELF_HEALS}) ${formatWatchdogExtra(extra)} (${formatDaemonHealthState({
|
|
536
|
+
connectedAt: lastConnectedAt,
|
|
537
|
+
lastPongAt,
|
|
538
|
+
lastInboundAt,
|
|
539
|
+
lastSuccessfulHttpAt,
|
|
540
|
+
lastPresenceConfirmedAt,
|
|
541
|
+
})})`,
|
|
542
|
+
);
|
|
543
|
+
if (watchdogHealAttempts > DAEMON_WATCHDOG_MAX_SELF_HEALS) {
|
|
544
|
+
daemonShuttingDown = true;
|
|
545
|
+
logError("[watchdog] Self-heal budget exceeded; exiting daemon for supervisor restart");
|
|
546
|
+
void requestShutdown("watchdog self-heal budget exceeded")
|
|
547
|
+
.catch((error) => {
|
|
548
|
+
logError(`watchdog shutdown failed: ${error?.message || error}`);
|
|
549
|
+
})
|
|
550
|
+
.finally(() => {
|
|
551
|
+
cleanupLock();
|
|
552
|
+
exitFn(1);
|
|
553
|
+
});
|
|
554
|
+
return;
|
|
555
|
+
}
|
|
556
|
+
watchdogAwaitingHealthySignalAt = null;
|
|
557
|
+
wsConnected = false;
|
|
558
|
+
disconnectedSinceLastConnectedLog = true;
|
|
559
|
+
if (typeof client.forceReconnect === "function") {
|
|
560
|
+
Promise.resolve(client.forceReconnect(`watchdog:${reason}`)).catch((error) => {
|
|
561
|
+
logError(`watchdog forceReconnect failed: ${error?.message || error}`);
|
|
562
|
+
});
|
|
563
|
+
return;
|
|
564
|
+
}
|
|
565
|
+
Promise.resolve(client.disconnect())
|
|
566
|
+
.catch((error) => {
|
|
567
|
+
logError(`watchdog disconnect failed: ${error?.message || error}`);
|
|
568
|
+
})
|
|
569
|
+
.finally(() => {
|
|
570
|
+
client.connect().catch((error) => {
|
|
571
|
+
logError(`watchdog reconnect failed: ${error?.message || error}`);
|
|
572
|
+
});
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
async function runDaemonWatchdog() {
|
|
577
|
+
if (daemonShuttingDown || !wsConnected || watchdogProbeInFlight) {
|
|
578
|
+
return;
|
|
579
|
+
}
|
|
580
|
+
const startedAt = Date.now();
|
|
581
|
+
if (!lastConnectedAt || startedAt - lastConnectedAt < DAEMON_WATCHDOG_CONNECT_GRACE_MS) {
|
|
582
|
+
return;
|
|
583
|
+
}
|
|
584
|
+
watchdogProbeInFlight = true;
|
|
585
|
+
try {
|
|
586
|
+
const probe = await probeAgentPresence();
|
|
587
|
+
const now = Date.now();
|
|
588
|
+
const lastWsHealthAt = Math.max(lastPongAt || 0, lastInboundAt || 0, lastConnectedAt || 0);
|
|
589
|
+
const staleWs = !lastWsHealthAt || now - lastWsHealthAt > DAEMON_WATCHDOG_STALE_WS_MS;
|
|
590
|
+
|
|
591
|
+
if (!probe.ok) {
|
|
592
|
+
if (now - watchdogLastProbeErrorAt >= DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS) {
|
|
593
|
+
watchdogLastProbeErrorAt = now;
|
|
594
|
+
logError(`[watchdog] agent presence probe failed: ${probe.error}`);
|
|
595
|
+
}
|
|
596
|
+
if (staleWs) {
|
|
597
|
+
requestWatchdogSelfHeal("stale_ws_health", {
|
|
598
|
+
probeAt: lastPresenceCheckAt,
|
|
599
|
+
probeStatus: probe.status,
|
|
600
|
+
probeError: probe.error,
|
|
601
|
+
lastWsHealthAt,
|
|
602
|
+
staleForMs: now - lastWsHealthAt,
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
return;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
if (!probe.selfOnline && now - watchdogLastPresenceMismatchAt >= DAEMON_WATCHDOG_RECONNECT_COOLDOWN_MS) {
|
|
609
|
+
watchdogLastPresenceMismatchAt = now;
|
|
610
|
+
logError(`[watchdog] agent presence probe did not include current host; skipping self-heal to avoid false positives on non-sticky HTTP/WS deployments (${formatWatchdogExtra({
|
|
611
|
+
agentCount: probe.agentCount,
|
|
612
|
+
probeAt: lastPresenceCheckAt,
|
|
613
|
+
})})`);
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
if (staleWs) {
|
|
617
|
+
requestWatchdogSelfHeal("stale_ws_health", {
|
|
618
|
+
agentCount: probe.agentCount,
|
|
619
|
+
lastWsHealthAt,
|
|
620
|
+
staleForMs: now - lastWsHealthAt,
|
|
621
|
+
probeAt: lastPresenceCheckAt,
|
|
622
|
+
});
|
|
623
|
+
}
|
|
624
|
+
} finally {
|
|
625
|
+
watchdogProbeInFlight = false;
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
|
|
404
629
|
async function recoverStaleTasks() {
|
|
405
630
|
try {
|
|
406
631
|
const response = await fetchFn(`${BACKEND_HTTP}/api/tasks`, {
|
|
@@ -414,6 +639,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
414
639
|
logError(`Failed to recover stale tasks: HTTP ${response.status}`);
|
|
415
640
|
return;
|
|
416
641
|
}
|
|
642
|
+
markBackendHttpSuccess();
|
|
417
643
|
|
|
418
644
|
const tasks = await response.json();
|
|
419
645
|
if (!Array.isArray(tasks) || tasks.length === 0) {
|
|
@@ -445,6 +671,8 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
445
671
|
});
|
|
446
672
|
if (!patchResp.ok) {
|
|
447
673
|
logError(`Failed to mark stale task ${taskId} as killed: HTTP ${patchResp.status}`);
|
|
674
|
+
} else {
|
|
675
|
+
markBackendHttpSuccess();
|
|
448
676
|
}
|
|
449
677
|
}),
|
|
450
678
|
);
|
|
@@ -468,6 +696,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
468
696
|
logError(`Failed to reconcile tasks: HTTP ${response.status}`);
|
|
469
697
|
return;
|
|
470
698
|
}
|
|
699
|
+
markBackendHttpSuccess();
|
|
471
700
|
const tasks = await response.json();
|
|
472
701
|
if (!Array.isArray(tasks)) {
|
|
473
702
|
return;
|
|
@@ -497,6 +726,7 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
497
726
|
});
|
|
498
727
|
if (patchResp.ok) {
|
|
499
728
|
killedCount += 1;
|
|
729
|
+
markBackendHttpSuccess();
|
|
500
730
|
} else {
|
|
501
731
|
logError(`Failed to reconcile stale task ${taskId}: HTTP ${patchResp.status}`);
|
|
502
732
|
}
|
|
@@ -552,6 +782,9 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
552
782
|
}
|
|
553
783
|
|
|
554
784
|
function handleEvent(event) {
|
|
785
|
+
const receivedAt = Date.now();
|
|
786
|
+
lastInboundAt = receivedAt;
|
|
787
|
+
markWatchdogHealthy("inbound", receivedAt);
|
|
555
788
|
if (event.type === "error") {
|
|
556
789
|
const payload = event?.payload && typeof event.payload === "object" ? event.payload : {};
|
|
557
790
|
const planLimitMessage = getPlanLimitMessage(payload);
|
|
@@ -574,6 +807,79 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
574
807
|
}
|
|
575
808
|
if (event.type === "stop_task") {
|
|
576
809
|
handleStopTask(event.payload);
|
|
810
|
+
return;
|
|
811
|
+
}
|
|
812
|
+
if (event.type === "collect_logs") {
|
|
813
|
+
void handleCollectLogs(event.payload);
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
function markWatchdogHealthy(signal, at = Date.now()) {
|
|
818
|
+
if (!watchdogAwaitingHealthySignalAt || watchdogHealAttempts === 0) {
|
|
819
|
+
return;
|
|
820
|
+
}
|
|
821
|
+
if (at < watchdogAwaitingHealthySignalAt) {
|
|
822
|
+
return;
|
|
823
|
+
}
|
|
824
|
+
log(
|
|
825
|
+
`[watchdog] Backend websocket healthy again after self-heal via ${signal} (${formatDaemonHealthState({
|
|
826
|
+
connectedAt: lastConnectedAt,
|
|
827
|
+
lastPongAt,
|
|
828
|
+
lastInboundAt,
|
|
829
|
+
lastSuccessfulHttpAt,
|
|
830
|
+
lastPresenceConfirmedAt,
|
|
831
|
+
})})`,
|
|
832
|
+
);
|
|
833
|
+
watchdogAwaitingHealthySignalAt = null;
|
|
834
|
+
watchdogHealAttempts = 0;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
async function handleCollectLogs(payload) {
|
|
838
|
+
const requestId = payload?.request_id ? String(payload.request_id).trim() : "";
|
|
839
|
+
const taskId = payload?.task_id ? String(payload.task_id).trim() : "";
|
|
840
|
+
const collectedAt = new Date().toISOString();
|
|
841
|
+
|
|
842
|
+
if (!requestId || !taskId) {
|
|
843
|
+
logError(`Invalid collect_logs payload: ${JSON.stringify(payload)}`);
|
|
844
|
+
return;
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
let result;
|
|
848
|
+
try {
|
|
849
|
+
result = await Promise.resolve(
|
|
850
|
+
logCollector.collect(taskId, {
|
|
851
|
+
tailLines: payload?.options?.tail_lines,
|
|
852
|
+
since: payload?.options?.since,
|
|
853
|
+
}),
|
|
854
|
+
);
|
|
855
|
+
} catch (error) {
|
|
856
|
+
result = {
|
|
857
|
+
projectPath: null,
|
|
858
|
+
logPath: null,
|
|
859
|
+
entries: [],
|
|
860
|
+
truncated: false,
|
|
861
|
+
error: `Failed to read log file: ${error?.message || error}`,
|
|
862
|
+
collectedAt,
|
|
863
|
+
};
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
try {
|
|
867
|
+
await client.sendJson({
|
|
868
|
+
type: "agent_log_collected",
|
|
869
|
+
payload: {
|
|
870
|
+
request_id: requestId,
|
|
871
|
+
task_id: taskId,
|
|
872
|
+
daemon_host: AGENT_NAME,
|
|
873
|
+
project_path: result.projectPath,
|
|
874
|
+
log_path: result.logPath,
|
|
875
|
+
logs: result.entries,
|
|
876
|
+
truncated: Boolean(result.truncated),
|
|
877
|
+
error: result.error,
|
|
878
|
+
collected_at: result.collectedAt || collectedAt,
|
|
879
|
+
},
|
|
880
|
+
});
|
|
881
|
+
} catch (error) {
|
|
882
|
+
logError(`Failed to report agent_log_collected for ${taskId}: ${error?.message || error}`);
|
|
577
883
|
}
|
|
578
884
|
}
|
|
579
885
|
|
|
@@ -1004,6 +1310,11 @@ export function startDaemon(config = {}, deps = {}) {
|
|
|
1004
1310
|
}
|
|
1005
1311
|
|
|
1006
1312
|
closePromise = (async () => {
|
|
1313
|
+
daemonShuttingDown = true;
|
|
1314
|
+
if (watchdogTimer) {
|
|
1315
|
+
clearInterval(watchdogTimer);
|
|
1316
|
+
watchdogTimer = null;
|
|
1317
|
+
}
|
|
1007
1318
|
const activeEntries = [...activeTaskProcesses.entries()];
|
|
1008
1319
|
if (activeEntries.length > 0) {
|
|
1009
1320
|
log(`Shutdown requested (${reason}); stopping ${activeEntries.length} active task(s)`);
|
|
@@ -1118,6 +1429,82 @@ function parsePositiveInt(value, fallback) {
|
|
|
1118
1429
|
return fallback;
|
|
1119
1430
|
}
|
|
1120
1431
|
|
|
1432
|
+
function formatDisconnectDiagnostics(event) {
|
|
1433
|
+
const parts = [];
|
|
1434
|
+
const reason = typeof event?.reason === "string" && event.reason.trim()
|
|
1435
|
+
? event.reason.trim()
|
|
1436
|
+
: "unknown";
|
|
1437
|
+
parts.push(`reason=${reason}`);
|
|
1438
|
+
if (Number.isFinite(event?.closeCode)) {
|
|
1439
|
+
parts.push(`close_code=${event.closeCode}`);
|
|
1440
|
+
}
|
|
1441
|
+
if (typeof event?.closeReason === "string" && event.closeReason.trim()) {
|
|
1442
|
+
parts.push(`close_reason=${event.closeReason.trim()}`);
|
|
1443
|
+
}
|
|
1444
|
+
if (typeof event?.socketError === "string" && event.socketError.trim()) {
|
|
1445
|
+
parts.push(`socket_error=${event.socketError.trim()}`);
|
|
1446
|
+
}
|
|
1447
|
+
if (Number.isFinite(event?.missedPongs) && event.missedPongs > 0) {
|
|
1448
|
+
parts.push(`missed_pongs=${event.missedPongs}`);
|
|
1449
|
+
}
|
|
1450
|
+
if (Number.isFinite(event?.lastPingAt)) {
|
|
1451
|
+
parts.push(`last_ping_at=${formatIsoTimestamp(event.lastPingAt)}`);
|
|
1452
|
+
}
|
|
1453
|
+
if (Number.isFinite(event?.lastPongAt)) {
|
|
1454
|
+
parts.push(`last_pong_at=${formatIsoTimestamp(event.lastPongAt)}`);
|
|
1455
|
+
}
|
|
1456
|
+
if (Number.isFinite(event?.lastMessageAt)) {
|
|
1457
|
+
parts.push(`last_message_at=${formatIsoTimestamp(event.lastMessageAt)}`);
|
|
1458
|
+
}
|
|
1459
|
+
return parts.join(" ");
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
function formatDaemonHealthState({
|
|
1463
|
+
connectedAt,
|
|
1464
|
+
lastPongAt,
|
|
1465
|
+
lastInboundAt,
|
|
1466
|
+
lastSuccessfulHttpAt,
|
|
1467
|
+
lastPresenceConfirmedAt,
|
|
1468
|
+
}) {
|
|
1469
|
+
return [
|
|
1470
|
+
`connected_at=${formatIsoTimestamp(connectedAt)}`,
|
|
1471
|
+
`last_pong_at=${formatIsoTimestamp(lastPongAt)}`,
|
|
1472
|
+
`last_inbound_at=${formatIsoTimestamp(lastInboundAt)}`,
|
|
1473
|
+
`last_http_ok_at=${formatIsoTimestamp(lastSuccessfulHttpAt)}`,
|
|
1474
|
+
`last_presence_at=${formatIsoTimestamp(lastPresenceConfirmedAt)}`,
|
|
1475
|
+
].join(" ");
|
|
1476
|
+
}
|
|
1477
|
+
|
|
1478
|
+
function formatWatchdogExtra(extra) {
|
|
1479
|
+
const parts = [];
|
|
1480
|
+
if (Number.isFinite(extra?.agentCount)) {
|
|
1481
|
+
parts.push(`agent_count=${extra.agentCount}`);
|
|
1482
|
+
}
|
|
1483
|
+
if (Number.isFinite(extra?.probeStatus)) {
|
|
1484
|
+
parts.push(`probe_status=${extra.probeStatus}`);
|
|
1485
|
+
}
|
|
1486
|
+
if (Number.isFinite(extra?.probeAt)) {
|
|
1487
|
+
parts.push(`probe_at=${formatIsoTimestamp(extra.probeAt)}`);
|
|
1488
|
+
}
|
|
1489
|
+
if (typeof extra?.probeError === "string" && extra.probeError.trim()) {
|
|
1490
|
+
parts.push(`probe_error=${extra.probeError.trim()}`);
|
|
1491
|
+
}
|
|
1492
|
+
if (Number.isFinite(extra?.lastWsHealthAt)) {
|
|
1493
|
+
parts.push(`last_ws_health_at=${formatIsoTimestamp(extra.lastWsHealthAt)}`);
|
|
1494
|
+
}
|
|
1495
|
+
if (Number.isFinite(extra?.staleForMs)) {
|
|
1496
|
+
parts.push(`stale_for_ms=${extra.staleForMs}`);
|
|
1497
|
+
}
|
|
1498
|
+
return parts.length ? parts.join(" ") : "no-extra-diagnostics";
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
function formatIsoTimestamp(value) {
|
|
1502
|
+
if (!Number.isFinite(value)) {
|
|
1503
|
+
return "never";
|
|
1504
|
+
}
|
|
1505
|
+
return new Date(value).toISOString();
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1121
1508
|
async function withTimeout(promise, timeoutMs, label) {
|
|
1122
1509
|
let timer = null;
|
|
1123
1510
|
const timeoutPromise = new Promise((_, reject) => {
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
import { SessionDiskStore } from "@love-moon/conductor-sdk";
|
|
5
|
+
|
|
6
|
+
const DEFAULT_TAIL_LINES = 200;
|
|
7
|
+
const MAX_TAIL_LINES = 500;
|
|
8
|
+
const DEFAULT_TAIL_BYTES = 256 * 1024;
|
|
9
|
+
const MAX_TAIL_BYTES = 1024 * 1024;
|
|
10
|
+
|
|
11
|
+
function clampPositiveInt(value, fallback, max) {
|
|
12
|
+
const parsed = Number.parseInt(String(value ?? ""), 10);
|
|
13
|
+
if (!Number.isFinite(parsed) || parsed <= 0) {
|
|
14
|
+
return fallback;
|
|
15
|
+
}
|
|
16
|
+
return Math.min(parsed, max);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function normalizeSince(value) {
|
|
20
|
+
if (typeof value !== "string" || !value.trim()) {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
const timestamp = Date.parse(value);
|
|
24
|
+
return Number.isFinite(timestamp) ? timestamp : null;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function inferLevel(message) {
|
|
28
|
+
const normalized = String(message || "").toLowerCase();
|
|
29
|
+
if (normalized.includes("error") || normalized.includes("failed")) {
|
|
30
|
+
return "ERROR";
|
|
31
|
+
}
|
|
32
|
+
if (normalized.includes("warn")) {
|
|
33
|
+
return "WARN";
|
|
34
|
+
}
|
|
35
|
+
return "INFO";
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function parseLogLine(line) {
|
|
39
|
+
const content = String(line || "").trim();
|
|
40
|
+
if (!content) {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const rfcMatch = content.match(/^\[([^\]]+)\]\s+\[([A-Z]+)\]\s+(.*)$/);
|
|
45
|
+
if (rfcMatch) {
|
|
46
|
+
const timestamp = Date.parse(rfcMatch[1]);
|
|
47
|
+
if (Number.isFinite(timestamp)) {
|
|
48
|
+
return {
|
|
49
|
+
timestamp: new Date(timestamp).toISOString(),
|
|
50
|
+
level: rfcMatch[2],
|
|
51
|
+
message: rfcMatch[3],
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const conductorMatch = content.match(/^\[([^\]]+?)\s+(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})\]\s+(.*)$/);
|
|
57
|
+
if (conductorMatch) {
|
|
58
|
+
const timestamp = Date.parse(conductorMatch[2]);
|
|
59
|
+
if (Number.isFinite(timestamp)) {
|
|
60
|
+
return {
|
|
61
|
+
timestamp: new Date(timestamp).toISOString(),
|
|
62
|
+
level: inferLevel(conductorMatch[3]),
|
|
63
|
+
message: `[${conductorMatch[1]}] ${conductorMatch[3]}`,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
timestamp: null,
|
|
70
|
+
level: inferLevel(content),
|
|
71
|
+
message: content,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function readTailText(filePath, maxBytes) {
|
|
76
|
+
const stats = fs.statSync(filePath);
|
|
77
|
+
const bytesToRead = Math.min(stats.size, maxBytes);
|
|
78
|
+
const start = Math.max(0, stats.size - bytesToRead);
|
|
79
|
+
const buffer = Buffer.alloc(bytesToRead);
|
|
80
|
+
const fd = fs.openSync(filePath, "r");
|
|
81
|
+
try {
|
|
82
|
+
fs.readSync(fd, buffer, 0, bytesToRead, start);
|
|
83
|
+
} finally {
|
|
84
|
+
fs.closeSync(fd);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
let text = buffer.toString("utf8");
|
|
88
|
+
if (start > 0) {
|
|
89
|
+
const firstNewline = text.indexOf("\n");
|
|
90
|
+
text = firstNewline >= 0 ? text.slice(firstNewline + 1) : "";
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
text,
|
|
94
|
+
truncatedByBytes: start > 0,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export class DaemonLogCollector {
|
|
99
|
+
constructor(backendUrl, options = {}) {
|
|
100
|
+
this.sessionStore = options.sessionStore || SessionDiskStore.forBackendUrl(backendUrl);
|
|
101
|
+
this.readTailText = options.readTailText || readTailText;
|
|
102
|
+
this.existsSync = options.existsSync || fs.existsSync;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
collect(taskId, options = {}) {
|
|
106
|
+
const normalizedTaskId = String(taskId || "").trim();
|
|
107
|
+
const collectedAt = new Date().toISOString();
|
|
108
|
+
if (!normalizedTaskId) {
|
|
109
|
+
return {
|
|
110
|
+
projectPath: null,
|
|
111
|
+
logPath: null,
|
|
112
|
+
entries: [],
|
|
113
|
+
truncated: false,
|
|
114
|
+
error: "task_id is required",
|
|
115
|
+
collectedAt,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const record = this.sessionStore.findByTaskId(normalizedTaskId);
|
|
120
|
+
if (!record) {
|
|
121
|
+
return {
|
|
122
|
+
projectPath: null,
|
|
123
|
+
logPath: null,
|
|
124
|
+
entries: [],
|
|
125
|
+
truncated: false,
|
|
126
|
+
error: "Task not found in session store",
|
|
127
|
+
collectedAt,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const projectPath = path.resolve(record.projectPath);
|
|
132
|
+
const logPath = path.join(projectPath, "conductor.log");
|
|
133
|
+
if (!this.existsSync(logPath)) {
|
|
134
|
+
return {
|
|
135
|
+
projectPath,
|
|
136
|
+
logPath,
|
|
137
|
+
entries: [],
|
|
138
|
+
truncated: false,
|
|
139
|
+
error: "Log file not found",
|
|
140
|
+
collectedAt,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const tailLines = clampPositiveInt(options.tailLines, DEFAULT_TAIL_LINES, MAX_TAIL_LINES);
|
|
145
|
+
const maxBytes = clampPositiveInt(options.maxBytes, DEFAULT_TAIL_BYTES, MAX_TAIL_BYTES);
|
|
146
|
+
const sinceMs = normalizeSince(options.since);
|
|
147
|
+
const { text, truncatedByBytes } = this.readTailText(logPath, maxBytes);
|
|
148
|
+
let anchorTimestampMs = null;
|
|
149
|
+
const allEntries = [];
|
|
150
|
+
for (const line of text.split(/\r?\n/)) {
|
|
151
|
+
const entry = parseLogLine(line);
|
|
152
|
+
if (!entry) {
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
const entryTimestampMs =
|
|
156
|
+
typeof entry.timestamp === "string" ? Date.parse(entry.timestamp) : anchorTimestampMs;
|
|
157
|
+
if (typeof entry.timestamp === "string" && Number.isFinite(entryTimestampMs)) {
|
|
158
|
+
anchorTimestampMs = entryTimestampMs;
|
|
159
|
+
}
|
|
160
|
+
if (sinceMs !== null) {
|
|
161
|
+
if (!Number.isFinite(entryTimestampMs)) {
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
if (entryTimestampMs < sinceMs) {
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
allEntries.push(entry);
|
|
169
|
+
}
|
|
170
|
+
const entries = allEntries.slice(-tailLines);
|
|
171
|
+
|
|
172
|
+
return {
|
|
173
|
+
projectPath,
|
|
174
|
+
logPath,
|
|
175
|
+
entries,
|
|
176
|
+
truncated: truncatedByBytes || allEntries.length > entries.length,
|
|
177
|
+
error: null,
|
|
178
|
+
collectedAt,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
}
|