@love-moon/conductor-cli 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,366 @@
1
+ #!/usr/bin/env node
2
+
3
+ import process from "node:process";
4
+ import yargs from "yargs/yargs";
5
+ import { hideBin } from "yargs/helpers";
6
+
7
+ import { loadConfig } from "@love-moon/conductor-sdk";
8
+
9
+ const CLI_NAME = process.env.CONDUCTOR_CLI_NAME || "conductor diagnose";
10
+ const DEFAULT_TIMEOUT_MS = 8000;
11
+
12
+ const args = yargs(hideBin(process.argv))
13
+ .scriptName(CLI_NAME)
14
+ .usage("Usage: $0 <task-id> [options]")
15
+ .option("config-file", {
16
+ type: "string",
17
+ describe: "Path to Conductor config file",
18
+ })
19
+ .option("json", {
20
+ type: "boolean",
21
+ default: false,
22
+ describe: "Print raw JSON output",
23
+ })
24
+ .option("timeout-ms", {
25
+ type: "number",
26
+ default: DEFAULT_TIMEOUT_MS,
27
+ describe: "HTTP timeout in milliseconds",
28
+ })
29
+ .example("$0 3b3e09b8-6d6f-4f3b-9f2f-3fca86b9e3cb", "Diagnose a task")
30
+ .example("$0 3b3e09b8-6d6f-4f3b-9f2f-3fca86b9e3cb --json", "Diagnose and print JSON")
31
+ .demandCommand(1, "task-id is required")
32
+ .help()
33
+ .strictOptions()
34
+ .parse();
35
+
36
+ const taskId = String(args._[0] || "").trim();
37
+ if (!taskId) {
38
+ process.stderr.write("task-id is required\n");
39
+ process.exit(1);
40
+ }
41
+
42
+ main().catch((error) => {
43
+ process.stderr.write(`diagnose failed: ${error?.message || error}\n`);
44
+ process.exit(1);
45
+ });
46
+
47
+ async function main() {
48
+ const config = loadConfig(args.configFile);
49
+ const timeoutMs = normalizePositiveInt(args.timeoutMs, DEFAULT_TIMEOUT_MS);
50
+ const baseUrl = String(config.backendUrl || "").replace(/\/+$/, "");
51
+ const endpoint = `${baseUrl}/api/diagnostics/tasks/${encodeURIComponent(taskId)}`;
52
+
53
+ const full = await fetchJson(endpoint, config.agentToken, timeoutMs);
54
+ let report;
55
+ if (full.ok) {
56
+ report = {
57
+ mode: "full",
58
+ endpoint,
59
+ payload: full.body,
60
+ };
61
+ } else if (full.status === 404) {
62
+ const fallback = await runFallbackDiagnosis(baseUrl, config.agentToken, taskId, timeoutMs);
63
+ report = {
64
+ mode: "fallback",
65
+ endpoint,
66
+ payload: fallback,
67
+ note: "backend does not expose /api/diagnostics/tasks/:taskId yet",
68
+ };
69
+ } else {
70
+ const body = full.body && typeof full.body === "object" ? full.body : null;
71
+ const msg = body?.error || body?.message || `HTTP ${full.status}`;
72
+ throw new Error(`diagnostics request failed: ${msg}`);
73
+ }
74
+
75
+ if (args.json) {
76
+ process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
77
+ return;
78
+ }
79
+
80
+ printReport(taskId, report);
81
+ }
82
+
83
+ async function runFallbackDiagnosis(baseUrl, token, taskId, timeoutMs) {
84
+ const taskResp = await fetchJson(`${baseUrl}/api/tasks/${encodeURIComponent(taskId)}`, token, timeoutMs);
85
+ if (!taskResp.ok) {
86
+ throw new Error(`fallback: failed to fetch task (${taskResp.status})`);
87
+ }
88
+
89
+ const msgResp = await fetchJson(
90
+ `${baseUrl}/api/tasks/${encodeURIComponent(taskId)}/messages`,
91
+ token,
92
+ timeoutMs,
93
+ );
94
+ if (!msgResp.ok) {
95
+ throw new Error(`fallback: failed to fetch messages (${msgResp.status})`);
96
+ }
97
+
98
+ const agentsResp = await fetchJson(`${baseUrl}/api/agents`, token, timeoutMs);
99
+ if (!agentsResp.ok) {
100
+ throw new Error(`fallback: failed to fetch agents (${agentsResp.status})`);
101
+ }
102
+
103
+ const task = taskResp.body || {};
104
+ const messages = Array.isArray(msgResp.body) ? msgResp.body : [];
105
+ const agents = Array.isArray(agentsResp.body) ? agentsResp.body : [];
106
+
107
+ const latestUser = messages
108
+ .filter((m) => normalizeRole(m?.role) === "user")
109
+ .sort((a, b) => toMs(b?.created_at || b?.createdAt) - toMs(a?.created_at || a?.createdAt))[0] || null;
110
+ const latestSdk = messages
111
+ .filter((m) => {
112
+ const role = normalizeRole(m?.role);
113
+ return role === "sdk" || role === "assistant";
114
+ })
115
+ .sort((a, b) => toMs(b?.created_at || b?.createdAt) - toMs(a?.created_at || a?.createdAt))[0] || null;
116
+
117
+ const latestUserMs = toMs(latestUser?.created_at || latestUser?.createdAt);
118
+ const latestSdkMs = toMs(latestSdk?.created_at || latestSdk?.createdAt);
119
+ const hasPendingUser = Boolean(latestUser && (!latestSdk || latestUserMs > latestSdkMs));
120
+ const pendingAgeMs = hasPendingUser && Number.isFinite(latestUserMs) ? Date.now() - latestUserMs : null;
121
+ const latestSdkFailureKey = detectExecutionFailureLoopKey(latestSdk?.content);
122
+
123
+ const assignedHost = cleanText(task?.agent_host || task?.agentHost);
124
+ const assignedConnected = Boolean(
125
+ assignedHost && agents.some((agent) => cleanText(agent?.host) === assignedHost),
126
+ );
127
+
128
+ const diagnosis = classifyFallback({
129
+ taskStatus: normalizeTaskStatus(task?.status),
130
+ hasPendingUser,
131
+ pendingAgeMs,
132
+ latestSdkFailureKey,
133
+ assignedHost,
134
+ assignedConnected,
135
+ });
136
+
137
+ return {
138
+ task: {
139
+ id: task?.id || taskId,
140
+ status: normalizeTaskStatus(task?.status),
141
+ agent_host: assignedHost || null,
142
+ },
143
+ messages: {
144
+ total: messages.length,
145
+ latest_user: latestUser,
146
+ latest_sdk: latestSdk,
147
+ latest_sdk_failure_key: latestSdkFailureKey,
148
+ has_pending_user: hasPendingUser,
149
+ pending_age_ms: pendingAgeMs,
150
+ },
151
+ realtime: {
152
+ connected_agents: agents,
153
+ assigned_host_connected: assignedConnected,
154
+ },
155
+ diagnosis,
156
+ };
157
+ }
158
+
159
+ function classifyFallback(input) {
160
+ if (input.taskStatus === "completed" || input.taskStatus === "killed") {
161
+ return {
162
+ code: "task_terminal",
163
+ confidence: "high",
164
+ summary: `task is already ${input.taskStatus}`,
165
+ reasons: [`task.status=${input.taskStatus}`],
166
+ next_actions: [],
167
+ };
168
+ }
169
+
170
+ if (!input.hasPendingUser) {
171
+ if (input.latestSdkFailureKey) {
172
+ return {
173
+ code: "execution_failure_loop",
174
+ confidence: "high",
175
+ summary: "latest sdk message indicates execution-layer failure loop",
176
+ reasons: [
177
+ "latest user message is not newer than latest sdk message",
178
+ `latest sdk failure key=${input.latestSdkFailureKey}`,
179
+ ],
180
+ next_actions: [
181
+ "restart task-scoped fire process and verify PTY child cleanup",
182
+ "if repeated, create a new task and terminate old stuck task",
183
+ ],
184
+ };
185
+ }
186
+ return {
187
+ code: "no_pending_user",
188
+ confidence: "high",
189
+ summary: "no pending user message newer than last sdk reply",
190
+ reasons: ["latest user message is not newer than latest sdk message"],
191
+ next_actions: [],
192
+ };
193
+ }
194
+
195
+ if (!input.assignedConnected) {
196
+ return {
197
+ code: "likely_ws_or_routing_issue",
198
+ confidence: "high",
199
+ summary: "pending user message and assigned agent is offline",
200
+ reasons: [
201
+ "latest user message is pending",
202
+ input.assignedHost ? `assigned host ${input.assignedHost} is not connected` : "task has no assigned host",
203
+ ],
204
+ next_actions: [
205
+ "check fire/daemon websocket reconnect logs",
206
+ "verify task binding and outbox delivery on backend",
207
+ ],
208
+ };
209
+ }
210
+
211
+ if (typeof input.pendingAgeMs === "number" && input.pendingAgeMs > 120000) {
212
+ return {
213
+ code: "likely_runturn_stuck",
214
+ confidence: "medium",
215
+ summary: "agent is online but pending user message has waited >120s",
216
+ reasons: [
217
+ "assigned host is still connected",
218
+ `pending age ${Math.round(input.pendingAgeMs / 1000)}s`,
219
+ ],
220
+ next_actions: [
221
+ "inspect fire log for repeated turn waiting on same replyTo",
222
+ "enable CONDUCTOR_TUI_TRACE=1 to confirm driver state progression",
223
+ ],
224
+ };
225
+ }
226
+
227
+ return {
228
+ code: "pending_but_processing",
229
+ confidence: "low",
230
+ summary: "pending user message exists but may still be processing",
231
+ reasons: [
232
+ "assigned host is connected",
233
+ typeof input.pendingAgeMs === "number" ? `pending age ${Math.round(input.pendingAgeMs / 1000)}s` : "pending age unknown",
234
+ ],
235
+ next_actions: [],
236
+ };
237
+ }
238
+
239
+ function printReport(taskId, report) {
240
+ const payload = report?.payload || {};
241
+ const diagnosis = payload?.diagnosis || {};
242
+ const task = payload?.task || {};
243
+ const realtime = payload?.realtime || {};
244
+ const messages = payload?.messages || {};
245
+
246
+ process.stdout.write(`Task: ${task?.id || taskId}\n`);
247
+ process.stdout.write(`Mode: ${report.mode}${report.note ? ` (${report.note})` : ""}\n`);
248
+ process.stdout.write(`Verdict: ${String(diagnosis.code || "unknown")} (${String(diagnosis.confidence || "unknown")})\n`);
249
+ process.stdout.write(`Summary: ${String(diagnosis.summary || "n/a")}\n`);
250
+ process.stdout.write("\n");
251
+ process.stdout.write("Signals:\n");
252
+ process.stdout.write(`- task.status: ${String(task?.status || "unknown")}\n`);
253
+ process.stdout.write(`- task.agent_host: ${String(task?.agent_host || task?.agentHost || "n/a")}\n`);
254
+ process.stdout.write(`- bound_agent_host: ${String(realtime?.bound_agent_host || realtime?.boundAgentHost || "n/a")}\n`);
255
+ process.stdout.write(
256
+ `- pending user: ${Boolean(messages?.has_pending_user ?? messages?.hasPendingUser)}${
257
+ typeof (messages?.pending_age_ms ?? messages?.pendingAgeMs) === "number"
258
+ ? ` (${Math.round((messages?.pending_age_ms ?? messages?.pendingAgeMs) / 1000)}s)`
259
+ : ""
260
+ }\n`,
261
+ );
262
+ if (payload?.outbox) {
263
+ process.stdout.write(`- outbox.available: ${Boolean(payload.outbox.available)}\n`);
264
+ if (payload.outbox.latest_for_pending_user) {
265
+ process.stdout.write(
266
+ `- outbox.latest_for_pending_user: ${String(payload.outbox.latest_for_pending_user.status || "unknown")} (${String(
267
+ payload.outbox.latest_for_pending_user.last_error || "no_error",
268
+ )})\n`,
269
+ );
270
+ }
271
+ }
272
+
273
+ const reasons = Array.isArray(diagnosis.reasons) ? diagnosis.reasons : [];
274
+ if (reasons.length > 0) {
275
+ process.stdout.write("\nReasons:\n");
276
+ for (const reason of reasons) {
277
+ process.stdout.write(`- ${reason}\n`);
278
+ }
279
+ }
280
+
281
+ const nextActions = Array.isArray(diagnosis.next_actions) ? diagnosis.next_actions : [];
282
+ if (nextActions.length > 0) {
283
+ process.stdout.write("\nNext Actions:\n");
284
+ for (const action of nextActions) {
285
+ process.stdout.write(`- ${action}\n`);
286
+ }
287
+ }
288
+ }
289
+
290
+ async function fetchJson(url, token, timeoutMs) {
291
+ const controller = new AbortController();
292
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
293
+ if (typeof timer.unref === "function") {
294
+ timer.unref();
295
+ }
296
+ try {
297
+ const response = await fetch(url, {
298
+ method: "GET",
299
+ headers: {
300
+ Authorization: `Bearer ${token}`,
301
+ Accept: "application/json",
302
+ },
303
+ signal: controller.signal,
304
+ });
305
+ const body = await parseJsonSafe(response);
306
+ return {
307
+ ok: response.ok,
308
+ status: response.status,
309
+ body,
310
+ };
311
+ } finally {
312
+ clearTimeout(timer);
313
+ }
314
+ }
315
+
316
+ async function parseJsonSafe(response) {
317
+ const text = await response.text();
318
+ if (!text) {
319
+ return null;
320
+ }
321
+ try {
322
+ return JSON.parse(text);
323
+ } catch {
324
+ return { raw: text };
325
+ }
326
+ }
327
+
328
+ function normalizeRole(role) {
329
+ return cleanText(role).toLowerCase();
330
+ }
331
+
332
+ function normalizeTaskStatus(status) {
333
+ const normalized = cleanText(status).toLowerCase();
334
+ if (normalized === "completed") return "completed";
335
+ if (normalized === "running") return "running";
336
+ if (normalized === "killed" || normalized === "failed" || normalized === "cancelled") return "killed";
337
+ return normalized || "unknown";
338
+ }
339
+
340
+ function cleanText(value) {
341
+ return typeof value === "string" ? value.trim() : "";
342
+ }
343
+
344
+ function toMs(value) {
345
+ const ms = Date.parse(String(value || ""));
346
+ return Number.isFinite(ms) ? ms : NaN;
347
+ }
348
+
349
+ function detectExecutionFailureLoopKey(value) {
350
+ const normalized = cleanText(value).toLowerCase();
351
+ if (!normalized) return null;
352
+ if (normalized.includes("pty session already spawned")) return "pty_session_already_spawned";
353
+ if (
354
+ normalized.includes("tui process has exited") ||
355
+ normalized.includes("cannot proceed: tui process has exited")
356
+ ) {
357
+ return "tui_process_exited";
358
+ }
359
+ if (normalized.includes("execution_failure_loop")) return "execution_failure_loop";
360
+ return null;
361
+ }
362
+
363
+ function normalizePositiveInt(value, fallback) {
364
+ const parsed = Number.parseInt(String(value ?? ""), 10);
365
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
366
+ }
@@ -103,6 +103,12 @@ const DEFAULT_POLL_INTERVAL_MS = parseInt(
103
103
  process.env.CONDUCTOR_CLI_POLL_INTERVAL_MS || process.env.CCODEX_POLL_INTERVAL_MS || "2000",
104
104
  10,
105
105
  );
106
+ const DEFAULT_TURN_DEADLINE_MS = 12 * 60 * 1000;
107
+ const MIN_TURN_DEADLINE_MS = 30 * 1000;
108
+ const MAX_TURN_DEADLINE_MS = 30 * 60 * 1000;
109
+ const DEFAULT_ERROR_LOOP_WINDOW_MS = 2 * 60 * 1000;
110
+ const DEFAULT_ERROR_LOOP_BACKOFF_MS = 3 * 60 * 1000;
111
+ const DEFAULT_ERROR_LOOP_THRESHOLD = 3;
106
112
 
107
113
  function appendFireLocalLog(line) {
108
114
  if (!ENABLE_FIRE_LOCAL_LOG) {
@@ -1027,6 +1033,39 @@ function sanitizeForLog(value, maxLen = 180) {
1027
1033
  return truncateText(String(value).replace(/\s+/g, " ").trim(), maxLen);
1028
1034
  }
1029
1035
 
1036
+ function getBoundedEnvInt(envName, fallback, min, max) {
1037
+ const fallbackNumber = Number(fallback);
1038
+ const normalizedFallback = Number.isFinite(fallbackNumber)
1039
+ ? Math.min(Math.max(Math.round(fallbackNumber), min), max)
1040
+ : min;
1041
+ const raw = process.env[envName];
1042
+ const parsed = Number.parseInt(String(raw ?? ""), 10);
1043
+ if (!Number.isFinite(parsed)) {
1044
+ return normalizedFallback;
1045
+ }
1046
+ return Math.min(Math.max(parsed, min), max);
1047
+ }
1048
+
1049
+ function normalizeExecutionErrorKey(errorMessage) {
1050
+ const normalized = sanitizeForLog(errorMessage, 280).toLowerCase();
1051
+ if (!normalized) {
1052
+ return "unknown_error";
1053
+ }
1054
+ if (normalized.includes("pty session already spawned")) {
1055
+ return "pty_session_already_spawned";
1056
+ }
1057
+ if (normalized.includes("tui process has exited")) {
1058
+ return "tui_process_exited";
1059
+ }
1060
+ if (normalized.includes("cannot proceed: tui process has exited")) {
1061
+ return "cannot_proceed_tui_exited";
1062
+ }
1063
+ if (normalized.includes("turn exceeded hard deadline")) {
1064
+ return "turn_timeout";
1065
+ }
1066
+ return normalized;
1067
+ }
1068
+
1030
1069
  function tailLines(value, count = 6) {
1031
1070
  if (!value) return "";
1032
1071
  const lines = String(value).split(/\r?\n/);
@@ -1045,6 +1084,7 @@ class TuiDriverSession {
1045
1084
  this.sessionId = resumeSessionId || `${backend}-${Date.now()}`;
1046
1085
  this.history = Array.isArray(options.initialHistory) ? [...options.initialHistory] : [];
1047
1086
  this.pendingHistorySeed = this.history.length > 0;
1087
+ this.sessionInfo = null;
1048
1088
 
1049
1089
  const allowCliList = options.configFile ? loadAllowCliList(options.configFile) : DEFAULT_ALLOW_CLI_LIST;
1050
1090
  const cliCommand = CUSTOM_CLI_COMMAND || allowCliList[backend] || backend;
@@ -1066,6 +1106,12 @@ class TuiDriverSession {
1066
1106
  this.closeRequested = false;
1067
1107
  this.closed = false;
1068
1108
  this.closeWaiters = new Set();
1109
+ this.turnDeadlineMs = getBoundedEnvInt(
1110
+ "CONDUCTOR_TURN_DEADLINE_MS",
1111
+ DEFAULT_TURN_DEADLINE_MS,
1112
+ MIN_TURN_DEADLINE_MS,
1113
+ MAX_TURN_DEADLINE_MS,
1114
+ );
1069
1115
 
1070
1116
  const profileName = profileNameForBackend(backend);
1071
1117
  if (!profileName) {
@@ -1129,6 +1175,10 @@ class TuiDriverSession {
1129
1175
  }
1130
1176
  log(`[${this.backend}] [WARN] Please run "${this.command} login" or authenticate manually.`);
1131
1177
  });
1178
+
1179
+ this.driver.on("session", (session) => {
1180
+ this.applySessionInfo(session);
1181
+ });
1132
1182
  }
1133
1183
 
1134
1184
  get threadId() {
@@ -1139,12 +1189,86 @@ class TuiDriverSession {
1139
1189
  return { model: this.backend };
1140
1190
  }
1141
1191
 
1192
+ applySessionInfo(session) {
1193
+ if (!session || typeof session !== "object") {
1194
+ return;
1195
+ }
1196
+ const sessionId = typeof session.sessionId === "string" ? session.sessionId.trim() : "";
1197
+ const sessionFilePath =
1198
+ typeof session.sessionFilePath === "string" ? session.sessionFilePath.trim() : "";
1199
+ if (!sessionId) {
1200
+ return;
1201
+ }
1202
+ this.sessionId = sessionId;
1203
+ this.sessionInfo = {
1204
+ backend: this.backend,
1205
+ sessionId,
1206
+ sessionFilePath: sessionFilePath || undefined,
1207
+ };
1208
+ this.trace(
1209
+ `session id=${sessionId} file="${sanitizeForLog(sessionFilePath, 180)}"`,
1210
+ );
1211
+ }
1212
+
1213
+ getSessionInfo() {
1214
+ if (this.sessionInfo) {
1215
+ return { ...this.sessionInfo };
1216
+ }
1217
+ return null;
1218
+ }
1219
+
1220
+ async ensureSessionInfo() {
1221
+ if (!this.driver) {
1222
+ return null;
1223
+ }
1224
+ try {
1225
+ await this.driver.boot();
1226
+ } catch (error) {
1227
+ this.trace(`session boot failed: ${sanitizeForLog(error?.message || error, 180)}`);
1228
+ return this.getSessionInfo();
1229
+ }
1230
+
1231
+ try {
1232
+ if (typeof this.driver.ensureSessionInfo === "function") {
1233
+ const detected = await this.driver.ensureSessionInfo();
1234
+ this.applySessionInfo(detected);
1235
+ } else if (typeof this.driver.getSessionInfo === "function") {
1236
+ this.applySessionInfo(this.driver.getSessionInfo());
1237
+ }
1238
+ } catch (error) {
1239
+ this.trace(`session detect failed: ${sanitizeForLog(error?.message || error, 180)}`);
1240
+ }
1241
+
1242
+ return this.getSessionInfo();
1243
+ }
1244
+
1245
+ async getSessionUsageSummary() {
1246
+ if (!this.driver || typeof this.driver.getSessionUsageSummary !== "function") {
1247
+ return null;
1248
+ }
1249
+ try {
1250
+ const summary = await this.driver.getSessionUsageSummary();
1251
+ return summary && typeof summary === "object" ? summary : null;
1252
+ } catch (error) {
1253
+ this.trace(`session usage detect failed: ${sanitizeForLog(error?.message || error, 180)}`);
1254
+ return null;
1255
+ }
1256
+ }
1257
+
1142
1258
  createSessionClosedError() {
1143
1259
  const error = new Error("TUI session closed");
1144
1260
  error.reason = "session_closed";
1145
1261
  return error;
1146
1262
  }
1147
1263
 
1264
+ createTurnTimeoutError(timeoutMs) {
1265
+ const seconds = Math.max(1, Math.round(timeoutMs / 1000));
1266
+ const error = new Error(`Turn exceeded hard deadline (${seconds}s)`);
1267
+ error.reason = "turn_timeout";
1268
+ error.timeoutMs = timeoutMs;
1269
+ return error;
1270
+ }
1271
+
1148
1272
  createCloseGuard() {
1149
1273
  if (this.closeRequested) {
1150
1274
  return {
@@ -1169,6 +1293,32 @@ class TuiDriverSession {
1169
1293
  };
1170
1294
  }
1171
1295
 
1296
+ createTurnTimeoutGuard() {
1297
+ if (!Number.isFinite(this.turnDeadlineMs) || this.turnDeadlineMs <= 0) {
1298
+ return {
1299
+ promise: new Promise(() => {}),
1300
+ cleanup: () => {},
1301
+ };
1302
+ }
1303
+ let timer = null;
1304
+ const promise = new Promise((_, reject) => {
1305
+ timer = setTimeout(() => {
1306
+ reject(this.createTurnTimeoutError(this.turnDeadlineMs));
1307
+ }, this.turnDeadlineMs);
1308
+ if (typeof timer.unref === "function") {
1309
+ timer.unref();
1310
+ }
1311
+ });
1312
+ return {
1313
+ promise,
1314
+ cleanup: () => {
1315
+ if (timer) {
1316
+ clearTimeout(timer);
1317
+ }
1318
+ },
1319
+ };
1320
+ }
1321
+
1172
1322
  flushCloseWaiters() {
1173
1323
  if (!this.closeWaiters || this.closeWaiters.size === 0) {
1174
1324
  return;
@@ -1364,9 +1514,10 @@ class TuiDriverSession {
1364
1514
  }
1365
1515
  }
1366
1516
  const closeGuard = this.createCloseGuard();
1517
+ const turnTimeoutGuard = this.createTurnTimeoutGuard();
1367
1518
 
1368
1519
  try {
1369
- const result = await Promise.race([this.driver.ask(effectivePrompt), closeGuard.promise]);
1520
+ const result = await Promise.race([this.driver.ask(effectivePrompt), closeGuard.promise, turnTimeoutGuard.promise]);
1370
1521
  const answer = String(result.answer || result.replyText || "").trim();
1371
1522
  this.trace(
1372
1523
  `runTurn finished success=${Boolean(result.success)} elapsedMs=${result.elapsedMs} answerLen=${answer.length} state=${this.driver.state}`,
@@ -1415,8 +1566,23 @@ class TuiDriverSession {
1415
1566
  throw error instanceof Error ? error : new Error(errorMessage);
1416
1567
  }
1417
1568
 
1418
- // 特殊处理登录和权限错误
1419
- if (errorReason === "login_required") {
1569
+ if (errorReason === "turn_timeout") {
1570
+ this.emitProgress(onProgress, {
1571
+ state: "ERROR",
1572
+ phase: "timeout_recovered",
1573
+ source: "tui-driver",
1574
+ error: errorMessage,
1575
+ reason: errorReason,
1576
+ timeout_ms: error?.timeoutMs,
1577
+ });
1578
+ log(`[${this.backend}] Turn timed out (${error?.timeoutMs || this.turnDeadlineMs}ms), restarting TUI session`);
1579
+ try {
1580
+ await this.driver.forceRestart();
1581
+ } catch (restartError) {
1582
+ log(`[${this.backend}] Failed to restart TUI after timeout: ${restartError?.message || restartError}`);
1583
+ }
1584
+ log(`[${this.backend}] Error: ${errorMessage}`);
1585
+ } else if (errorReason === "login_required") {
1420
1586
  this.emitProgress(onProgress, {
1421
1587
  state: "ERROR",
1422
1588
  phase: "login_required",
@@ -1469,6 +1635,7 @@ class TuiDriverSession {
1469
1635
  }
1470
1636
  clearInterval(signalTimer);
1471
1637
  this.driver.off("stateChange", handleStateChange);
1638
+ turnTimeoutGuard.cleanup();
1472
1639
  closeGuard.cleanup();
1473
1640
  this.trace(`runTurn cleanup state=${this.driver.state}`);
1474
1641
  }
@@ -1503,10 +1670,45 @@ export class BridgeRunner {
1503
1670
  this.stopped = false;
1504
1671
  this.runningTurn = false;
1505
1672
  this.processedMessageIds = new Set();
1673
+ this.inFlightMessageIds = new Set();
1506
1674
  this.lastRuntimeStatusSignature = null;
1507
1675
  this.lastRuntimeStatusPayload = null;
1676
+ this.runtimeContextSnapshot = null;
1677
+ this.runtimeContextSnapshotAt = 0;
1678
+ this.runtimeContextInFlight = null;
1679
+ this.runtimeContextRefreshMs = getBoundedEnvInt(
1680
+ "CONDUCTOR_RUNTIME_CONTEXT_REFRESH_MS",
1681
+ 2000,
1682
+ 500,
1683
+ 60 * 1000,
1684
+ );
1685
+ this.daemonName =
1686
+ (typeof process.env.CONDUCTOR_AGENT_NAME === "string" && process.env.CONDUCTOR_AGENT_NAME.trim()) ||
1687
+ (typeof process.env.CONDUCTOR_DAEMON_NAME === "string" && process.env.CONDUCTOR_DAEMON_NAME.trim()) ||
1688
+ (typeof process.env.HOSTNAME === "string" && process.env.HOSTNAME.trim()) ||
1689
+ os.hostname();
1508
1690
  this.needsReconnectRecovery = false;
1509
1691
  this.remoteStopInfo = null;
1692
+ this.sessionAnnouncementSent = false;
1693
+ this.errorLoop = null;
1694
+ this.errorLoopWindowMs = getBoundedEnvInt(
1695
+ "CONDUCTOR_ERROR_LOOP_WINDOW_MS",
1696
+ DEFAULT_ERROR_LOOP_WINDOW_MS,
1697
+ 15 * 1000,
1698
+ 30 * 60 * 1000,
1699
+ );
1700
+ this.errorLoopBackoffMs = getBoundedEnvInt(
1701
+ "CONDUCTOR_ERROR_LOOP_BACKOFF_MS",
1702
+ DEFAULT_ERROR_LOOP_BACKOFF_MS,
1703
+ 15 * 1000,
1704
+ 60 * 60 * 1000,
1705
+ );
1706
+ this.errorLoopThreshold = getBoundedEnvInt(
1707
+ "CONDUCTOR_ERROR_LOOP_THRESHOLD",
1708
+ DEFAULT_ERROR_LOOP_THRESHOLD,
1709
+ 2,
1710
+ 20,
1711
+ );
1510
1712
  }
1511
1713
 
1512
1714
  copilotLog(message) {
@@ -1516,6 +1718,54 @@ export class BridgeRunner {
1516
1718
  log(`[copilot-debug] task=${this.taskId} ${message}`);
1517
1719
  }
1518
1720
 
1721
+ async announceBackendSession() {
1722
+ if (this.sessionAnnouncementSent) {
1723
+ return;
1724
+ }
1725
+ if (!this.backendSession || typeof this.backendSession.ensureSessionInfo !== "function") {
1726
+ return;
1727
+ }
1728
+ let sessionInfo = null;
1729
+ try {
1730
+ sessionInfo = await this.backendSession.ensureSessionInfo();
1731
+ } catch (error) {
1732
+ this.copilotLog(`session announce skipped: ${sanitizeForLog(error?.message || error, 160)}`);
1733
+ return;
1734
+ }
1735
+ const sessionId = String(sessionInfo?.sessionId || "").trim();
1736
+ const sessionFilePath = sessionInfo?.sessionFilePath ? String(sessionInfo.sessionFilePath).trim() : "";
1737
+ const hasRealSessionId = Boolean(sessionId);
1738
+ const message = hasRealSessionId
1739
+ ? `${this.backendName} session started: ${sessionId}`
1740
+ : `${this.backendName} session started`;
1741
+ try {
1742
+ await this.conductor.sendMessage(this.taskId, message, {
1743
+ backend: this.backendName,
1744
+ thread_id: hasRealSessionId ? sessionId : undefined,
1745
+ session_id: hasRealSessionId ? sessionId : undefined,
1746
+ session_file_path: sessionFilePath || undefined,
1747
+ cli_args: this.cliArgs,
1748
+ synthetic: true,
1749
+ });
1750
+ this.sessionAnnouncementSent = true;
1751
+ this.copilotLog(hasRealSessionId ? `session announced id=${sessionId}` : "session announced without id");
1752
+ await this.reportRuntimeStatus(
1753
+ {
1754
+ state: "WAIT_READY",
1755
+ phase: "session_started",
1756
+ source: "tui-driver",
1757
+ reply_in_progress: false,
1758
+ status_done_line: `${this.backendName} session started`,
1759
+ backend: this.backendName,
1760
+ thread_id: hasRealSessionId ? sessionId : undefined,
1761
+ },
1762
+ undefined,
1763
+ );
1764
+ } catch (error) {
1765
+ log(`Failed to send session announcement: ${error?.message || error}`);
1766
+ }
1767
+ }
1768
+
1519
1769
  async start(abortSignal) {
1520
1770
  abortSignal?.addEventListener("abort", () => {
1521
1771
  this.stopped = true;
@@ -1524,6 +1774,11 @@ export class BridgeRunner {
1524
1774
  return;
1525
1775
  }
1526
1776
 
1777
+ await this.announceBackendSession();
1778
+ if (this.stopped) {
1779
+ return;
1780
+ }
1781
+
1527
1782
  if (this.initialPrompt) {
1528
1783
  this.copilotLog("processing initial prompt");
1529
1784
  await this.handleSyntheticMessage(this.initialPrompt, {
@@ -1779,7 +2034,80 @@ export class BridgeRunner {
1779
2034
  }
1780
2035
  }
1781
2036
 
1782
- createRuntimeStatus(payload, replyTo) {
2037
+ normalizePercent(value) {
2038
+ if (!Number.isFinite(value)) {
2039
+ return undefined;
2040
+ }
2041
+ if (value < 0) {
2042
+ return 0;
2043
+ }
2044
+ if (value > 100) {
2045
+ return 100;
2046
+ }
2047
+ return value;
2048
+ }
2049
+
2050
+ async resolveRuntimeContext() {
2051
+ const now = Date.now();
2052
+ if (
2053
+ this.runtimeContextSnapshot &&
2054
+ now - this.runtimeContextSnapshotAt < this.runtimeContextRefreshMs
2055
+ ) {
2056
+ return this.runtimeContextSnapshot;
2057
+ }
2058
+ if (this.runtimeContextInFlight) {
2059
+ return this.runtimeContextInFlight;
2060
+ }
2061
+
2062
+ this.runtimeContextInFlight = (async () => {
2063
+ let sessionInfo = null;
2064
+ try {
2065
+ if (typeof this.backendSession?.getSessionInfo === "function") {
2066
+ sessionInfo = this.backendSession.getSessionInfo();
2067
+ }
2068
+ } catch {
2069
+ sessionInfo = null;
2070
+ }
2071
+
2072
+ let usage = null;
2073
+ try {
2074
+ if (typeof this.backendSession?.getSessionUsageSummary === "function") {
2075
+ usage = await this.backendSession.getSessionUsageSummary();
2076
+ }
2077
+ } catch {
2078
+ usage = null;
2079
+ }
2080
+
2081
+ const resolvedSessionId = String(
2082
+ usage?.sessionId || sessionInfo?.sessionId || "",
2083
+ ).trim();
2084
+ const resolvedSessionFilePath = String(
2085
+ usage?.sessionFilePath || sessionInfo?.sessionFilePath || "",
2086
+ ).trim();
2087
+ const tokenUsagePercent = this.normalizePercent(Number(usage?.tokenUsagePercent));
2088
+ const contextUsagePercent = this.normalizePercent(Number(usage?.contextUsagePercent));
2089
+
2090
+ const snapshot = {
2091
+ daemon: this.daemonName || undefined,
2092
+ pid: process.pid,
2093
+ session_id: resolvedSessionId || undefined,
2094
+ session_file_path: resolvedSessionFilePath || undefined,
2095
+ token_usage_percent: tokenUsagePercent,
2096
+ context_usage_percent: contextUsagePercent,
2097
+ };
2098
+ this.runtimeContextSnapshot = snapshot;
2099
+ this.runtimeContextSnapshotAt = Date.now();
2100
+ return snapshot;
2101
+ })();
2102
+
2103
+ try {
2104
+ return await this.runtimeContextInFlight;
2105
+ } finally {
2106
+ this.runtimeContextInFlight = null;
2107
+ }
2108
+ }
2109
+
2110
+ createRuntimeStatus(payload, replyTo, runtimeContext = null) {
1783
2111
  if (!payload || typeof payload !== "object") {
1784
2112
  return null;
1785
2113
  }
@@ -1804,12 +2132,22 @@ export class BridgeRunner {
1804
2132
  reply_preview: truncateText(replyPreview, 240) || undefined,
1805
2133
  reply_to: replyTo,
1806
2134
  backend: this.backendName,
1807
- thread_id: this.backendSession.threadId,
2135
+ thread_id:
2136
+ String(
2137
+ payload.thread_id || runtimeContext?.session_id || "",
2138
+ ).trim() || undefined,
2139
+ daemon: runtimeContext?.daemon,
2140
+ pid: runtimeContext?.pid,
2141
+ session_id: runtimeContext?.session_id,
2142
+ session_file_path: runtimeContext?.session_file_path,
2143
+ token_usage_percent: runtimeContext?.token_usage_percent,
2144
+ context_usage_percent: runtimeContext?.context_usage_percent,
1808
2145
  };
1809
2146
  }
1810
2147
 
1811
2148
  async reportRuntimeStatus(payload, replyTo) {
1812
- const runtime = this.createRuntimeStatus(payload, replyTo);
2149
+ const runtimeContext = await this.resolveRuntimeContext();
2150
+ const runtime = this.createRuntimeStatus(payload, replyTo, runtimeContext);
1813
2151
  if (!runtime) {
1814
2152
  return;
1815
2153
  }
@@ -1852,6 +2190,67 @@ export class BridgeRunner {
1852
2190
  }
1853
2191
  }
1854
2192
 
2193
+ resetErrorLoop() {
2194
+ this.errorLoop = null;
2195
+ }
2196
+
2197
+ evaluateErrorLoop(errorMessage) {
2198
+ const normalizedKey = normalizeExecutionErrorKey(errorMessage);
2199
+ const now = Date.now();
2200
+ const current = this.errorLoop;
2201
+
2202
+ if (
2203
+ !current ||
2204
+ current.key !== normalizedKey ||
2205
+ now - current.lastAt > this.errorLoopWindowMs
2206
+ ) {
2207
+ this.errorLoop = {
2208
+ key: normalizedKey,
2209
+ count: 1,
2210
+ lastAt: now,
2211
+ cooldownUntil: 0,
2212
+ };
2213
+ return {
2214
+ key: normalizedKey,
2215
+ count: 1,
2216
+ open: false,
2217
+ suppressReport: false,
2218
+ cooldownMs: 0,
2219
+ };
2220
+ }
2221
+
2222
+ current.count += 1;
2223
+ current.lastAt = now;
2224
+ const open = current.count >= this.errorLoopThreshold;
2225
+ let suppressReport = false;
2226
+
2227
+ if (open) {
2228
+ if (current.cooldownUntil > now) {
2229
+ suppressReport = true;
2230
+ } else {
2231
+ current.cooldownUntil = now + this.errorLoopBackoffMs;
2232
+ }
2233
+ }
2234
+
2235
+ this.errorLoop = current;
2236
+ return {
2237
+ key: normalizedKey,
2238
+ count: current.count,
2239
+ open,
2240
+ suppressReport,
2241
+ cooldownMs: suppressReport ? current.cooldownUntil - now : 0,
2242
+ };
2243
+ }
2244
+
2245
+ isExecutionFailureLoopError(errorMessage) {
2246
+ const normalized = String(errorMessage || "").toLowerCase();
2247
+ return (
2248
+ normalized.includes("pty session already spawned") ||
2249
+ normalized.includes("tui process has exited") ||
2250
+ normalized.includes("cannot proceed: tui process has exited")
2251
+ );
2252
+ }
2253
+
1855
2254
  async respondToMessage(message) {
1856
2255
  const content = String(message.content || "").trim();
1857
2256
  if (!content) {
@@ -1863,6 +2262,13 @@ export class BridgeRunner {
1863
2262
  this.copilotLog(`skip duplicated message replyTo=${replyTo}`);
1864
2263
  return;
1865
2264
  }
2265
+ if (replyTo && this.inFlightMessageIds.has(replyTo)) {
2266
+ this.copilotLog(`skip in-flight duplicated message replyTo=${replyTo}`);
2267
+ return;
2268
+ }
2269
+ if (replyTo) {
2270
+ this.inFlightMessageIds.add(replyTo);
2271
+ }
1866
2272
  this.lastRuntimeStatusSignature = null;
1867
2273
  this.runningTurn = true;
1868
2274
  const turnStartedAt = Date.now();
@@ -1932,6 +2338,7 @@ export class BridgeRunner {
1932
2338
  if (replyTo) {
1933
2339
  this.processedMessageIds.add(replyTo);
1934
2340
  }
2341
+ this.resetErrorLoop();
1935
2342
  this.copilotLog(`sdk_message sent replyTo=${replyTo || "latest"} responseLen=${responseText.length}`);
1936
2343
  } catch (error) {
1937
2344
  const errorMessage = error instanceof Error ? error.message : String(error);
@@ -1953,11 +2360,48 @@ export class BridgeRunner {
1953
2360
  },
1954
2361
  replyTo,
1955
2362
  );
1956
- await this.reportError(`${this.backendName} 处理失败: ${errorMessage}`, replyTo);
2363
+ const isExecutionFailure = this.isExecutionFailureLoopError(errorMessage);
2364
+ const loopState = isExecutionFailure
2365
+ ? this.evaluateErrorLoop(errorMessage)
2366
+ : {
2367
+ key: "non_execution_error",
2368
+ count: 1,
2369
+ open: false,
2370
+ suppressReport: false,
2371
+ cooldownMs: 0,
2372
+ };
2373
+ if (!isExecutionFailure) {
2374
+ this.resetErrorLoop();
2375
+ }
2376
+ const executionFailureLoop = isExecutionFailure && loopState.open;
2377
+ if (executionFailureLoop) {
2378
+ await this.reportRuntimeStatus(
2379
+ {
2380
+ state: "ERROR",
2381
+ phase: "execution_failure_loop",
2382
+ reply_in_progress: false,
2383
+ status_done_line: `${this.backendName} execution_failure_loop`,
2384
+ },
2385
+ replyTo,
2386
+ );
2387
+ }
2388
+ if (isExecutionFailure && loopState.suppressReport) {
2389
+ this.copilotLog(
2390
+ `suppress repeated error report key=${loopState.key} count=${loopState.count} cooldownMs=${loopState.cooldownMs}`,
2391
+ );
2392
+ return;
2393
+ }
2394
+ const reportMessage = executionFailureLoop
2395
+ ? `${this.backendName} 执行层失败循环(${loopState.key}, 连续${loopState.count}次): ${errorMessage}`
2396
+ : `${this.backendName} 处理失败: ${errorMessage}`;
2397
+ await this.reportError(reportMessage, replyTo);
1957
2398
  } finally {
1958
2399
  if (turnWatchdog) {
1959
2400
  clearInterval(turnWatchdog);
1960
2401
  }
2402
+ if (replyTo) {
2403
+ this.inFlightMessageIds.delete(replyTo);
2404
+ }
1961
2405
  this.copilotLog(
1962
2406
  `turn end replyTo=${replyTo || "latest"} elapsedMs=${Date.now() - turnStartedAt} processedIds=${this.processedMessageIds.size}`,
1963
2407
  );
package/bin/conductor.js CHANGED
@@ -8,6 +8,7 @@
8
8
  * daemon - Start long-running daemon for task orchestration
9
9
  * config - Interactive configuration setup
10
10
  * update - Update the CLI to the latest version
11
+ * diagnose - Diagnose a task in production/backend
11
12
  */
12
13
 
13
14
  import { fileURLToPath } from "node:url";
@@ -43,7 +44,7 @@ if (argv[0] === "--version" || argv[0] === "-v") {
43
44
  const subcommand = argv[0];
44
45
 
45
46
  // Valid subcommands
46
- const validSubcommands = ["fire", "daemon", "config", "update"];
47
+ const validSubcommands = ["fire", "daemon", "config", "update", "diagnose"];
47
48
 
48
49
  if (!validSubcommands.includes(subcommand)) {
49
50
  console.error(`Error: Unknown subcommand '${subcommand}'`);
@@ -85,6 +86,7 @@ Subcommands:
85
86
  daemon Start long-running daemon for task orchestration
86
87
  config Interactive configuration setup
87
88
  update Update the CLI to the latest version
89
+ diagnose Diagnose a task and print likely root cause
88
90
 
89
91
  Options:
90
92
  -h, --help Show this help message
@@ -94,6 +96,7 @@ Examples:
94
96
  conductor fire -- "fix the bug"
95
97
  conductor fire --backend claude -- "add feature"
96
98
  conductor daemon --config-file ~/.conductor/config.yaml
99
+ conductor diagnose <task-id>
97
100
  conductor config
98
101
  conductor update
99
102
 
@@ -102,6 +105,7 @@ For subcommand-specific help:
102
105
  conductor daemon --help
103
106
  conductor config --help
104
107
  conductor update --help
108
+ conductor diagnose --help
105
109
 
106
110
  Version: ${pkgJson.version}
107
111
  `);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@love-moon/conductor-cli",
3
- "version": "0.2.12",
3
+ "version": "0.2.13",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "conductor": "bin/conductor.js"
@@ -16,8 +16,8 @@
16
16
  "test": "node --test"
17
17
  },
18
18
  "dependencies": {
19
- "@love-moon/tui-driver": "0.2.12",
20
- "@love-moon/conductor-sdk": "0.2.12",
19
+ "@love-moon/tui-driver": "0.2.13",
20
+ "@love-moon/conductor-sdk": "0.2.13",
21
21
  "dotenv": "^16.4.5",
22
22
  "enquirer": "^2.4.1",
23
23
  "js-yaml": "^4.1.1",