openclaw-scheduler 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ import { readFileSync } from 'fs';
2
+ import { isatty } from 'node:tty';
3
+
4
+ function normalizeFlagValue(value, flagName) {
5
+ if (value === undefined || value === null) return null;
6
+ if (value === true) throw new Error(`${flagName} requires a value`);
7
+ return String(value);
8
+ }
9
+
10
+ export async function resolveMessageInput({
11
+ message = null,
12
+ messageFile = null,
13
+ messageEnv = null,
14
+ messageStdin = false,
15
+ stdinIsTTY = isatty(0),
16
+ env = process.env,
17
+ readFile = (path) => readFileSync(path, 'utf8'),
18
+ readStdin = () => readFileSync(0, 'utf8'),
19
+ } = {}) {
20
+ const directMessage = normalizeFlagValue(message, '--message');
21
+ const filePath = normalizeFlagValue(messageFile, '--message-file');
22
+ const envVar = normalizeFlagValue(messageEnv, '--message-env');
23
+ const wantsStdin = messageStdin === true || messageStdin === 'true';
24
+
25
+ const explicitSources = [];
26
+ if (directMessage !== null) explicitSources.push('--message');
27
+ if (filePath !== null) explicitSources.push('--message-file');
28
+ if (envVar !== null) explicitSources.push('--message-env');
29
+ if (wantsStdin) explicitSources.push('--message-stdin');
30
+
31
+ if (explicitSources.length > 1) {
32
+ throw new Error(`choose only one of ${explicitSources.join(', ')} for the prompt source`);
33
+ }
34
+
35
+ if (directMessage !== null) return directMessage;
36
+
37
+ if (filePath !== null) {
38
+ if (filePath === '-') {
39
+ if (stdinIsTTY === true) throw new Error('--message-file - requires piped stdin');
40
+ return readStdin();
41
+ }
42
+ try {
43
+ return readFile(filePath);
44
+ } catch (err) {
45
+ throw new Error(`--message-file: could not read file: ${err.message}`, { cause: err });
46
+ }
47
+ }
48
+
49
+ if (envVar !== null) {
50
+ if (!Object.prototype.hasOwnProperty.call(env, envVar)) {
51
+ throw new Error(`--message-env: environment variable ${envVar} is not set`);
52
+ }
53
+ return String(env[envVar] ?? '');
54
+ }
55
+
56
+ if (wantsStdin) {
57
+ if (stdinIsTTY === true) throw new Error('--message-stdin requires piped stdin');
58
+ return readStdin();
59
+ }
60
+
61
+ if (stdinIsTTY !== true) {
62
+ const pipedText = readStdin();
63
+ return pipedText.length > 0 ? pipedText : null;
64
+ }
65
+
66
+ return null;
67
+ }
@@ -31,7 +31,11 @@ import { readFileSync, writeFileSync, renameSync, statSync } from 'fs';
31
31
  import { dirname, join } from 'path';
32
32
  import { homedir } from 'os';
33
33
  import { fileURLToPath } from 'url';
34
- import { resolveCompletionDelivery } from './completion.mjs';
34
+ import {
35
+ extractTerminalAssistantReplyFromEntries,
36
+ hasCompletionSignal,
37
+ resolveCompletionDelivery,
38
+ } from './completion.mjs';
35
39
  import { sendMessage } from '../messages.js';
36
40
 
37
41
  const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -586,6 +590,28 @@ function readJsonlLastLines(sessionId, agentDir = 'main', n = 3) {
586
590
  }
587
591
  }
588
592
 
593
+ function readJsonlTailEntries(sessionId, agentDir = 'main', n = 200) {
594
+ return readJsonlLastLines(sessionId, agentDir, n);
595
+ }
596
+
597
+ function getSessionTerminalReply(sessionId, agentDir = 'main') {
598
+ const entries = readJsonlTailEntries(sessionId, agentDir, 200);
599
+ return extractTerminalAssistantReplyFromEntries(entries);
600
+ }
601
+
602
+ function formatDiagnosticSnippet(reply) {
603
+ if (!reply || typeof reply !== 'string') return '';
604
+ const normalized = reply.trim();
605
+ if (!normalized) return '';
606
+
607
+ const maxLen = 1200;
608
+ const clipped = normalized.length > maxLen
609
+ ? normalized.slice(0, maxLen) + '\n\n..[truncated]'
610
+ : normalized;
611
+
612
+ return `\n\nLast assistant report observed:\n${clipped}`;
613
+ }
614
+
589
615
  /**
590
616
  * Check if a session is currently mid-turn by inspecting its JSONL tail.
591
617
  * Returns a reason string if mid-turn is detected, null if safe to proceed.
@@ -658,6 +684,112 @@ function getJsonlMidTurnReason(sessionId, agentDir = 'main') {
658
684
  return null; // Last assistant entry appears to be a complete text reply -- safe to proceed
659
685
  }
660
686
 
687
+ /**
688
+ * Check the JSONL tail for a pending tool handoff without requiring recent
689
+ * file activity. Long-running tool calls can leave the transcript flat for
690
+ * minutes, so stale mtime alone is not enough to declare the agent stuck.
691
+ *
692
+ * @param {string} sessionId - Internal session UUID
693
+ * @param {string} agentDir - Agent directory (default: 'main')
694
+ * @returns {string|null} reason string if a tool handoff appears pending
695
+ */
696
+ function getJsonlPendingToolReason(sessionId, agentDir = 'main') {
697
+ const lastLines = readJsonlLastLines(sessionId, agentDir, 3);
698
+ if (!lastLines || lastLines.length === 0) return null;
699
+
700
+ const last = lastLines[lastLines.length - 1];
701
+
702
+ if (last?.role === 'assistant') {
703
+ const content = Array.isArray(last.content) ? last.content : [];
704
+ const toolUse = content.find(c => c?.type === 'tool_use');
705
+ if (toolUse) {
706
+ return `last assistant entry has tool_use (${toolUse.name || 'unknown'}) -- awaiting tool result`;
707
+ }
708
+ if (last.type === 'tool_use') {
709
+ return `last entry is tool_use (${last.name || 'unknown'}) -- awaiting tool result`;
710
+ }
711
+ }
712
+
713
+ if (last?.role === 'user') {
714
+ const content = Array.isArray(last.content) ? last.content : [];
715
+ if (content.some(c => c?.type === 'tool_result')) {
716
+ return 'last entry is tool_result (tool executed, awaiting assistant reply)';
717
+ }
718
+ }
719
+
720
+ if (last?.type === 'tool_result') {
721
+ return 'last entry is tool_result (tool executed, awaiting assistant reply)';
722
+ }
723
+
724
+ return null;
725
+ }
726
+
727
+ function parseTimestampMs(value) {
728
+ if (!value) return null;
729
+ if (typeof value === 'number') {
730
+ return Number.isFinite(value) ? value : null;
731
+ }
732
+ if (value instanceof Date) {
733
+ const timestamp = value.getTime();
734
+ return Number.isFinite(timestamp) ? timestamp : null;
735
+ }
736
+ const parsed = Date.parse(value);
737
+ return Number.isFinite(parsed) ? parsed : null;
738
+ }
739
+
740
+ /**
741
+ * Detect an agent session that has stopped making progress even though the
742
+ * watcher process itself is still alive and writing lastPing.
743
+ *
744
+ * This closes the failure mode where OpenClaw's Codex app-server retires a
745
+ * timed-out turn, but dispatch status keeps reporting "running" because the
746
+ * delivery watcher is still polling.
747
+ */
748
+ function getRunningSessionStallReason(status, thresholdMs) {
749
+ if (!status?.sessionKey) return null;
750
+
751
+ const sessionAgent = status.sessionKey.split(':')[1] || 'main';
752
+ const entry = getSessionStoreEntry(status.sessionKey);
753
+ if (!entry) return null;
754
+
755
+ const sessionId = entry.sessionId || null;
756
+ const now = Date.now();
757
+ const activityTimes = [
758
+ parseTimestampMs(entry.updatedAt),
759
+ parseTimestampMs(entry.lastActivityAt),
760
+ parseTimestampMs(entry.sessionStartedAt),
761
+ parseTimestampMs(entry.startedAt),
762
+ ].filter(t => typeof t === 'number');
763
+
764
+ const jsonlMtime = sessionId ? getSessionJsonlMtime(sessionId, sessionAgent) : null;
765
+ if (typeof jsonlMtime === 'number') activityTimes.push(jsonlMtime);
766
+
767
+ if (typeof status?.liveness?.ageMs === 'number' && status.liveness.ageMs < thresholdMs) {
768
+ return null;
769
+ }
770
+
771
+ const lastActivityMs = activityTimes.length ? Math.max(...activityTimes) : null;
772
+ if (lastActivityMs !== null && now - lastActivityMs < thresholdMs) {
773
+ return null;
774
+ }
775
+
776
+ const pendingToolReason = sessionId ? getJsonlPendingToolReason(sessionId, sessionAgent) : null;
777
+ if (pendingToolReason) {
778
+ process.stderr.write(
779
+ `[watcher] ${status.label || 'session'} stale telemetry but pending tool handoff detected: ${pendingToolReason}\n`
780
+ );
781
+ return null;
782
+ }
783
+
784
+ const idleMinutes = lastActivityMs === null
785
+ ? Math.ceil(thresholdMs / 60000)
786
+ : Math.max(1, Math.floor((now - lastActivityMs) / 60000));
787
+ return (
788
+ `agent session stalled: no session/jsonl activity for ~${idleMinutes}min ` +
789
+ `while delivery watcher remained alive; likely app-server turn retired or stopped producing events`
790
+ );
791
+ }
792
+
661
793
  /**
662
794
  * Read the last assistant entry's stop_reason from the session JSONL.
663
795
  * Returns the stop_reason string (e.g. 'end_turn', 'tool_use') or null if unavailable.
@@ -728,6 +860,7 @@ function markLabelError(label, errorSummary) {
728
860
  updateExistingLabel(label, (entry) => {
729
861
  if (entry.status === 'done') return false;
730
862
  entry.status = 'error';
863
+ entry.error = errorSummary || 'failed without result';
731
864
  entry.summary = errorSummary || 'failed without result';
732
865
  });
733
866
  } catch (e) {
@@ -735,6 +868,8 @@ function markLabelError(label, errorSummary) {
735
868
  }
736
869
  }
737
870
 
871
+ let exitZeroOnTerminal = false;
872
+
738
873
  /**
739
874
  * Format and output the delivery message, then exit 0.
740
875
  * Also marks the label as done in labels.json before exiting.
@@ -768,7 +903,7 @@ function deliverResult(label, lastReply, fallbackSummary, completionPayload = nu
768
903
  `**Error:** ${stderr || 'non-zero exit'}\n\n` +
769
904
  `Job marked as \`error\`. The agent may have reported done without completing the actual work.\n`
770
905
  );
771
- process.exit(1);
906
+ process.exit(exitZeroOnTerminal ? 0 : 1);
772
907
  }
773
908
  }
774
909
  } catch (loadErr) {
@@ -790,10 +925,32 @@ function deliverResult(label, lastReply, fallbackSummary, completionPayload = nu
790
925
  ? completion.deliveryText.slice(0, maxLen) + '\n\n..[truncated]'
791
926
  : completion.deliveryText;
792
927
  process.stdout.write(`🌶️ *dispatch* [${label}] completed:\n\n${reply}\n`);
793
- } else {
794
- process.stderr.write(`[watcher] [${label}] completion delivery suppressed (no meaningful reply or summary)\n`);
928
+ process.exit(0);
795
929
  }
796
- process.exit(0);
930
+
931
+ const failureSummary = 'completed without a clean user-facing completion';
932
+ process.stderr.write(`[watcher] [${label}] completion delivery suppressed (no meaningful reply or summary)\n`);
933
+ markLabelError(label, failureSummary);
934
+ process.stdout.write(
935
+ `⚠️ dispatch [${label}] completed, but no clean user-facing completion was captured. ` +
936
+ `Internal diagnostics were suppressed; check scheduler run logs for details.\n`
937
+ );
938
+ process.exit(exitZeroOnTerminal ? 0 : 1);
939
+ }
940
+
941
+ function emitInterruptedOutcome(label, summary, result = null) {
942
+ process.stderr.write(`[watcher] [${label}] session auto-resolved as interrupted -- work may be incomplete\n`);
943
+ markLabelError(label, summary || 'interrupted: session went idle without calling done');
944
+ process.stdout.write(
945
+ `⚠️ dispatch [${label}] session went idle before completing -- work may be incomplete` +
946
+ `${formatDiagnosticSnippet(result?.diagnosticReply || result?.lastReply || null)}\n`
947
+ );
948
+ process.exit(exitZeroOnTerminal ? 0 : 1);
949
+ }
950
+
951
+ function emitTimeoutOutcome(label, message, result = null) {
952
+ process.stdout.write(`${message}${formatDiagnosticSnippet(result?.diagnosticReply || result?.lastReply || null)}\n`);
953
+ process.exit(exitZeroOnTerminal ? 0 : 1);
797
954
  }
798
955
 
799
956
  // -- Watcher heartbeat interval ref --------------------------------------
@@ -828,6 +985,8 @@ const flags = parseFlags(process.argv.slice(2));
828
985
  const label = flags.label;
829
986
  const timeoutS = parseInt(flags.timeout || '600', 10);
830
987
  const pollS = parseInt(flags['poll-interval'] || '20', 10);
988
+ const once = flags.once === true || flags.once === 'true';
989
+ exitZeroOnTerminal = once;
831
990
 
832
991
  // How long a session must be idle before we proactively check result
833
992
  const IDLE_RESULT_CHECK_MS = 60000;
@@ -837,6 +996,144 @@ if (!label) {
837
996
  process.exit(2);
838
997
  }
839
998
 
999
+ function touchWatcherPing(label) {
1000
+ updateExistingLabel(label, (entry) => {
1001
+ if (entry.status !== 'running') return false;
1002
+ entry.lastPing = new Date().toISOString();
1003
+ });
1004
+ }
1005
+
1006
+ function markWatcherPending(label, reason = 'target still running') {
1007
+ process.stderr.write(`[watcher] WATCHER_PENDING label=${label} reason=${reason}\n`);
1008
+ process.exit(0);
1009
+ }
1010
+
1011
+ function clearWatcherRetryAfter(label) {
1012
+ updateExistingLabel(label, (entry) => {
1013
+ if (!entry.watcherRetryAfter) return false;
1014
+ delete entry.watcherRetryAfter;
1015
+ });
1016
+ }
1017
+
1018
+ function handleOnce529(label, errorMsg) {
1019
+ const labels = loadLabels();
1020
+ const entry = labels[label] || {};
1021
+ const retryCount = getRetryCount(label);
1022
+
1023
+ if (retryCount >= MAX_529_RETRIES) {
1024
+ markLabelError(label, `max_retries_exceeded (${retryCount}x 529): ${errorMsg}`);
1025
+ process.stdout.write(
1026
+ `🌶️ *dispatch* [${label}] failed after ${MAX_529_RETRIES} retries (529 overload)\n` +
1027
+ `Error: ${errorMsg}\n`
1028
+ );
1029
+ process.exit(0);
1030
+ }
1031
+
1032
+ const retryAfterMs = parseTimestampMs(entry.watcherRetryAfter);
1033
+ if (!retryAfterMs) {
1034
+ const retryResult = attempt529Retry(label, retryCount, errorMsg);
1035
+ if (!retryResult.retry) return handleOnce529(label, errorMsg);
1036
+ updateExistingLabel(label, (current) => {
1037
+ current.watcherRetryAfter = new Date(Date.now() + retryResult.delayMs).toISOString();
1038
+ });
1039
+ markWatcherPending(label, `529 retry scheduled for future tick (${retryResult.delayMs / 1000}s)`);
1040
+ }
1041
+
1042
+ if (Date.now() < retryAfterMs) {
1043
+ markWatcherPending(label, '529 retry backoff active');
1044
+ }
1045
+
1046
+ if (respawnSession(label)) {
1047
+ clearWatcherRetryAfter(label);
1048
+ markWatcherPending(label, '529 retry dispatched');
1049
+ }
1050
+
1051
+ markLabelError(label, `529 retry failed -- could not respawn session: ${errorMsg}`);
1052
+ process.stdout.write(
1053
+ `🌶️ *dispatch* [${label}] 529 retry failed -- could not respawn session\n` +
1054
+ `Error: ${errorMsg}\n`
1055
+ );
1056
+ process.exit(0);
1057
+ }
1058
+
1059
+ function runOnceAndExit() {
1060
+ try {
1061
+ touchWatcherPing(label);
1062
+ } catch {
1063
+ // Best-effort -- a quick-poll tick must not fail because heartbeat metadata raced.
1064
+ }
1065
+
1066
+ const status = dispatch('status', ['--label', label]);
1067
+ if (!status?.ok) {
1068
+ markWatcherPending(label, 'status unavailable');
1069
+ }
1070
+
1071
+ if (status.status === 'error') {
1072
+ const errorMsg = status.error || status.summary || '';
1073
+ if (is529Error(errorMsg)) {
1074
+ handleOnce529(label, errorMsg);
1075
+ }
1076
+ }
1077
+
1078
+ if (status.status !== 'running') {
1079
+ const terminalResult = dispatch('result', ['--label', label]);
1080
+ const terminalCompletion = terminalResult?.completion || status?.completion || null;
1081
+
1082
+ if (status.status === 'done') {
1083
+ const currentRetryCount = getRetryCount(label);
1084
+ if (currentRetryCount > 0) setRetryCount(label, 0);
1085
+ const gwRetryCount = getGwRestartRetryCount(label);
1086
+ if (gwRetryCount > 0) setGwRestartRetryCount(label, 0);
1087
+ deliverResult(label, terminalResult?.lastReply, status.summary, terminalCompletion);
1088
+ }
1089
+
1090
+ if (status.status === 'interrupted') {
1091
+ emitInterruptedOutcome(label, status.summary, terminalResult);
1092
+ }
1093
+
1094
+ const summary = status.error || status.summary || `terminal failure (${status.status || 'unknown'})`;
1095
+ markLabelError(label, summary);
1096
+ process.stdout.write(`🌶️ *dispatch* [${label}] failed\nSummary: ${summary}\n`);
1097
+ process.exit(0);
1098
+ }
1099
+
1100
+ if (status.sessionKey) {
1101
+ const entry = getSessionStoreEntry(status.sessionKey);
1102
+ const sessionId = entry?.sessionId || null;
1103
+ const sessionAgent = status.sessionKey.split(':')[1] || 'main';
1104
+ const terminalJsonlReply = sessionId ? getSessionTerminalReply(sessionId, sessionAgent) : null;
1105
+ if (sessionId && terminalJsonlReply && isSessionCleanlyFinished(sessionId, sessionAgent)) {
1106
+ const result = dispatch('result', ['--label', label]);
1107
+ deliverResult(label, result?.lastReply || terminalJsonlReply, 'completed (stop_reason=end_turn)', result?.completion || null);
1108
+ }
1109
+ }
1110
+
1111
+ const ageMs = status.liveness?.ageMs;
1112
+ if (ageMs != null && ageMs >= IDLE_RESULT_CHECK_MS) {
1113
+ const result = dispatch('result', ['--label', label]);
1114
+ if (result?.lastReply || hasCompletionSignal(result?.completion)) {
1115
+ deliverResult(label, result?.lastReply || null, null, result?.completion || null);
1116
+ }
1117
+
1118
+ const stallReason = getRunningSessionStallReason(status, IDLE_RESULT_CHECK_MS);
1119
+ if (stallReason) {
1120
+ process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
1121
+ markLabelError(label, stallReason);
1122
+ process.stdout.write(
1123
+ `❌ *dispatch* [${label}] failed\n` +
1124
+ `Summary: ${stallReason}\n`
1125
+ );
1126
+ process.exit(0);
1127
+ }
1128
+ }
1129
+
1130
+ markWatcherPending(label);
1131
+ }
1132
+
1133
+ if (once) {
1134
+ runOnceAndExit();
1135
+ }
1136
+
840
1137
  // -- Start heartbeat -----------------------------------------------------
841
1138
  // Write lastPing to labels.json every PING_INTERVAL_MS while the session is
842
1139
  // still running. The watchdog guard in index.mjs reads lastPing to know this
@@ -870,17 +1167,47 @@ let lastKnownReply = null;
870
1167
  let lastKnownCompletion = null;
871
1168
 
872
1169
  // -- SIGTERM handler (scheduler kills watcher with SIGTERM before SIGKILL) --
873
- // Ensures labels.json is updated and a delivery attempt is made even when killed.
1170
+ // Hand off to a fresh watcher instead of converting the kill into a fake success.
874
1171
  process.on('SIGTERM', () => {
875
- process.stderr.write(`[watcher] SIGTERM received for ${label} -- marking as interrupted\n`);
876
- // Try to fetch the latest result before dying
1172
+ process.stderr.write(`[watcher] SIGTERM received for ${label} -- attempting watcher handoff\n`);
1173
+
1174
+ let latestStatus = null;
1175
+ try {
1176
+ latestStatus = dispatch('status', ['--label', label]);
1177
+ } catch {}
1178
+
877
1179
  try {
878
1180
  const result = dispatch('result', ['--label', label]);
879
1181
  if (result?.lastReply) lastKnownReply = result.lastReply;
880
1182
  if (result?.completion) lastKnownCompletion = result.completion;
881
1183
  } catch {}
882
- // deliverResult calls process.exit(0) internally
883
- deliverResult(label, lastKnownReply, 'interrupted by watcher timeout', lastKnownCompletion);
1184
+
1185
+ if (latestStatus?.status === 'done') {
1186
+ deliverResult(label, lastKnownReply, latestStatus.summary || null, lastKnownCompletion || latestStatus?.completion || null);
1187
+ }
1188
+
1189
+ if (latestStatus?.status === 'interrupted') {
1190
+ markLabelError(label, latestStatus.summary || 'interrupted: session went idle without calling done');
1191
+ process.exit(1);
1192
+ }
1193
+
1194
+ if (latestStatus?.status && latestStatus.status !== 'running') {
1195
+ const summary = latestStatus.error || latestStatus.summary || `terminal failure (${latestStatus.status})`;
1196
+ markLabelError(label, summary);
1197
+ process.stdout.write(`🌶️ *dispatch* [${label}] failed\nSummary: ${summary}\n`);
1198
+ process.exit(1);
1199
+ }
1200
+
1201
+ const handoff = dispatch('watcher-handoff', ['--label', label, '--reason', 'sigterm']);
1202
+ if (handoff?.ok && (handoff.scheduled || handoff.reason === 'label already terminal' || handoff.reason === 'delivery disabled for this label')) {
1203
+ process.stderr.write(`[watcher] SIGTERM handoff ${handoff.scheduled ? 'scheduled' : 'skipped'} for ${label}\n`);
1204
+ process.exit(0);
1205
+ }
1206
+
1207
+ const failureSummary = 'interrupted by watcher timeout (handoff failed)';
1208
+ markLabelError(label, failureSummary);
1209
+ process.stdout.write(`⚠️ dispatch [${label}] watcher interrupted and handoff failed\nSummary: ${failureSummary}\n`);
1210
+ process.exit(1);
884
1211
  });
885
1212
 
886
1213
  // -- Rolling deadline vars ------------------------------------
@@ -1024,11 +1351,21 @@ while (Date.now() < deadline) {
1024
1351
 
1025
1352
  // -- Path 1: status auto-resolved to done ------------------
1026
1353
  if (status.status !== 'running') {
1354
+ const terminalResult = dispatch('result', ['--label', label]);
1355
+ const terminalCompletion = terminalResult?.completion || status?.completion || null;
1356
+ const hasTerminalCompletionEvidence = Boolean(
1357
+ terminalResult?.lastReply
1358
+ || terminalResult?.completion?.deliveryText
1359
+ || terminalResult?.completion?.summary
1360
+ || status?.completion?.deliveryText
1361
+ || status?.completion?.summary
1362
+ );
1363
+
1027
1364
  // -- Spawn failure detection -----------------------------------------
1028
1365
  // If the session was auto-resolved to 'done' (or 'spawn-warning') but was
1029
- // never seen in the gateway, it never ran -- this is a spawn failure.
1030
- // Causes: auth timeout, quota exhaustion, gateway error at spawn time.
1031
- if (!sessionEverFound && (status.status === 'done' || status.status === 'spawn-warning' || status.status === 'error')) {
1366
+ // never seen in the gateway, it never ran -- unless a terminal completion
1367
+ // payload/reply proves the work already finished before this watcher saw it.
1368
+ if (!sessionEverFound && (status.status === 'spawn-warning' || status.status === 'error' || (status.status === 'done' && !hasTerminalCompletionEvidence))) {
1032
1369
  const spawnErrMsg =
1033
1370
  `[dispatch] SPAWN FAILURE: session ${status.sessionKey || '(unknown)'} never appeared ` +
1034
1371
  `in gateway -- spawn likely failed (auth timeout, quota, or gateway error). Label: ${label}`;
@@ -1055,7 +1392,7 @@ while (Date.now() < deadline) {
1055
1392
  // If the session DID produce a lastReply before being killed, deliver it normally.
1056
1393
  if (sessionEverFound && isGatewayRestartKill(status.summary)) {
1057
1394
  const gwCheckResult = dispatch('result', ['--label', label]);
1058
- if (!gwCheckResult?.lastReply && !gwCheckResult?.completion?.deliveryText) {
1395
+ if (!gwCheckResult?.lastReply && !hasCompletionSignal(gwCheckResult?.completion)) {
1059
1396
  // No result captured -- session was killed before completing
1060
1397
  const retryCount = getGwRestartRetryCount(label);
1061
1398
  if (retryCount >= MAX_GW_RESTART_RETRIES) {
@@ -1113,12 +1450,8 @@ while (Date.now() < deadline) {
1113
1450
  //
1114
1451
  // NOTE: Always resolve as 'interrupted', never 'done'. Only agent-side cmdDone may set status=done.
1115
1452
  if (status.status === 'interrupted') {
1116
- process.stderr.write(`[watcher] [${label}] session auto-resolved as interrupted -- work may be incomplete\n`);
1117
- process.stdout.write(
1118
- `⚠️ dispatch [${label}] session went idle before completing -- work may be incomplete\n`
1119
- );
1120
- markLabelError(label, status.summary || 'interrupted: session went idle without calling done');
1121
- process.exit(1);
1453
+ const interruptedResult = dispatch('result', ['--label', label]);
1454
+ emitInterruptedOutcome(label, status.summary, interruptedResult);
1122
1455
  }
1123
1456
 
1124
1457
  // Reset 529 retryCount on successful completion
@@ -1129,8 +1462,7 @@ while (Date.now() < deadline) {
1129
1462
  process.stderr.write(`[watcher] [${label}] completed after ${currentRetryCount} retry(ies), reset retryCount\n`);
1130
1463
  }
1131
1464
  }
1132
- const result = dispatch('result', ['--label', label]);
1133
- deliverResult(label, result?.lastReply, status.summary, result?.completion || status?.completion || null);
1465
+ deliverResult(label, terminalResult?.lastReply, status.summary, terminalCompletion);
1134
1466
  }
1135
1467
 
1136
1468
  // -- Path 2a: stop_reason early delivery (clean end_turn) --
@@ -1141,10 +1473,11 @@ while (Date.now() < deadline) {
1141
1473
  const _e2a = getSessionStoreEntry(status.sessionKey);
1142
1474
  const _sid2a = _e2a?.sessionId || null;
1143
1475
  const _adir2a = (status.sessionKey.split(':')[1]) || 'main';
1144
- if (_sid2a && isSessionCleanlyFinished(_sid2a, _adir2a)) {
1476
+ const terminalJsonlReply = _sid2a ? getSessionTerminalReply(_sid2a, _adir2a) : null;
1477
+ if (_sid2a && terminalJsonlReply && isSessionCleanlyFinished(_sid2a, _adir2a)) {
1145
1478
  process.stderr.write(`[watcher] stop_reason=end_turn detected -- delivering early\n`);
1146
1479
  const result = dispatch('result', ['--label', label]);
1147
- deliverResult(label, result?.lastReply, 'completed (stop_reason=end_turn)', result?.completion || null);
1480
+ deliverResult(label, result?.lastReply || terminalJsonlReply, 'completed (stop_reason=end_turn)', result?.completion || null);
1148
1481
  // deliverResult exits
1149
1482
  }
1150
1483
  }
@@ -1158,9 +1491,20 @@ while (Date.now() < deadline) {
1158
1491
  const ageMs = status.liveness?.ageMs;
1159
1492
  if (ageMs != null && ageMs >= IDLE_RESULT_CHECK_MS) {
1160
1493
  const result = dispatch('result', ['--label', label]);
1161
- if (result?.lastReply || result?.completion?.deliveryText) {
1494
+ if (result?.lastReply || hasCompletionSignal(result?.completion)) {
1162
1495
  deliverResult(label, result?.lastReply || null, null, result?.completion || null);
1163
1496
  }
1497
+
1498
+ const stallReason = getRunningSessionStallReason(status, IDLE_RESULT_CHECK_MS);
1499
+ if (stallReason) {
1500
+ process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
1501
+ markLabelError(label, stallReason);
1502
+ process.stdout.write(
1503
+ `❌ *dispatch* [${label}] failed\n` +
1504
+ `Summary: ${stallReason}\n`
1505
+ );
1506
+ process.exit(1);
1507
+ }
1164
1508
  }
1165
1509
 
1166
1510
 
@@ -1183,11 +1527,7 @@ if (finalStatus?.status === 'done') {
1183
1527
  // If status is interrupted (auto-resolved as incomplete), exit non-zero
1184
1528
  if (finalStatus?.status === 'interrupted') {
1185
1529
  process.stderr.write(`[watcher] [${label}] final status=interrupted -- session idle without completion\n`);
1186
- process.stdout.write(
1187
- `⚠️ dispatch [${label}] session went idle before completing -- work may be incomplete\n`
1188
- );
1189
- markLabelError(label, finalStatus?.summary || 'interrupted: session went idle without calling done');
1190
- process.exit(1);
1530
+ emitInterruptedOutcome(label, finalStatus?.summary, finalResult);
1191
1531
  }
1192
1532
 
1193
1533
  // -- Token-based activity check before steering ----------------------------
@@ -1237,7 +1577,7 @@ if (sessionInternalId) {
1237
1577
  // If the session already completed (gateway pruned it -> null tokens), exit cleanly.
1238
1578
  if (statusAtDeadline?.status === 'done' || baselineTokens === null) {
1239
1579
  const r = dispatch('result', ['--label', label]);
1240
- if (r?.lastReply || r?.completion?.deliveryText) {
1580
+ if (r?.lastReply || hasCompletionSignal(r?.completion)) {
1241
1581
  // deliverResult calls process.exit(0) internally
1242
1582
  deliverResult(label, r?.lastReply || null, statusAtDeadline?.summary || null, r?.completion || null);
1243
1583
  }
@@ -1255,8 +1595,7 @@ if (statusAtDeadline?.status === 'done' || baselineTokens === null) {
1255
1595
  // Session truly not found -- telemetry unavailable, exit
1256
1596
  process.stderr.write(`[watcher] token telemetry unavailable for ${label}; session not in store\n`);
1257
1597
  markLabelError(label, `timed out after ${timeoutS}s -- token telemetry unavailable`);
1258
- process.stdout.write(`⏱ dispatch [${label}] timed out after ${timeoutS}s -- token telemetry unavailable; no steer/kill attempted\n`);
1259
- process.exit(1);
1598
+ emitTimeoutOutcome(label, `⏱ dispatch [${label}] timed out after ${timeoutS}s -- token telemetry unavailable; no steer/kill attempted`, r);
1260
1599
  }
1261
1600
  // Session IS in store but no tokens -- mid-tool-call, fall through to activity window
1262
1601
  // Use updatedAt as activity signal instead of tokens
@@ -1277,7 +1616,7 @@ while (Date.now() - flatSince < FLAT_WINDOW_MS) {
1277
1616
  deliverResult(label, r?.lastReply || null, st.summary, r?.completion || st?.completion || null);
1278
1617
  }
1279
1618
  const r2 = dispatch('result', ['--label', label]);
1280
- if (r2?.lastReply || r2?.completion?.deliveryText) {
1619
+ if (r2?.lastReply || hasCompletionSignal(r2?.completion)) {
1281
1620
  // deliverResult calls process.exit(0) internally
1282
1621
  deliverResult(label, r2?.lastReply || null, null, r2?.completion || null);
1283
1622
  }
@@ -1290,8 +1629,8 @@ while (Date.now() - flatSince < FLAT_WINDOW_MS) {
1290
1629
  if (!entry) {
1291
1630
  process.stderr.write(`[watcher] token telemetry lost for ${label}; session gone from store\n`);
1292
1631
  markLabelError(label, `timed out after ${timeoutS}s -- token telemetry lost`);
1293
- process.stdout.write(`⏱ dispatch [${label}] timed out after ${timeoutS}s -- token telemetry lost; no steer/kill attempted\n`);
1294
- process.exit(1);
1632
+ const tokenLostResult = dispatch('result', ['--label', label]);
1633
+ emitTimeoutOutcome(label, `⏱ dispatch [${label}] timed out after ${timeoutS}s -- token telemetry lost; no steer/kill attempted`, tokenLostResult);
1295
1634
  }
1296
1635
  // Still in store -- check if updatedAt advanced (tool call still running)
1297
1636
  // Normalize: updatedAt may be seconds or milliseconds depending on agent framework version
@@ -1371,7 +1710,7 @@ if (sessionInternalId) {
1371
1710
  deliverResult(label, rExt?.lastReply || null, stExt.summary, rExt?.completion || stExt?.completion || null);
1372
1711
  }
1373
1712
  const rExt2 = dispatch('result', ['--label', label]);
1374
- if (rExt2?.lastReply || rExt2?.completion?.deliveryText) {
1713
+ if (rExt2?.lastReply || hasCompletionSignal(rExt2?.completion)) {
1375
1714
  // deliverResult calls process.exit(0) internally
1376
1715
  deliverResult(label, rExt2?.lastReply || null, null, rExt2?.completion || null);
1377
1716
  }
@@ -1428,7 +1767,7 @@ for (const round of steerRounds) {
1428
1767
  deliverResult(label, r3?.lastReply || null, st2.summary, r3?.completion || st2?.completion || null);
1429
1768
  }
1430
1769
  const r3 = dispatch('result', ['--label', label]);
1431
- if (r3?.lastReply || r3?.completion?.deliveryText) {
1770
+ if (r3?.lastReply || hasCompletionSignal(r3?.completion)) {
1432
1771
  // deliverResult calls process.exit(0) internally
1433
1772
  deliverResult(label, r3?.lastReply || null, null, r3?.completion || null);
1434
1773
  }
@@ -1443,17 +1782,16 @@ for (const round of steerRounds) {
1443
1782
  if (st3?.status === 'done') {
1444
1783
  // Check if a result was captured before marking as error
1445
1784
  const r4 = dispatch('result', ['--label', label]);
1446
- if (r4?.lastReply || r4?.completion?.deliveryText) {
1785
+ if (r4?.lastReply || hasCompletionSignal(r4?.completion)) {
1447
1786
  deliverResult(label, r4?.lastReply || null, st3.summary, r4?.completion || st3?.completion || null); // deliverResult calls process.exit(0)
1448
1787
  }
1449
1788
  markLabelError(label, 'timed out -- killed after steer attempts (no result captured)');
1450
- process.stdout.write(`⏱ dispatch [${label}] killed after steer attempts -- no result captured\n`);
1451
- process.exit(1);
1789
+ emitTimeoutOutcome(label, `⏱ dispatch [${label}] killed after steer attempts -- no result captured`, r4);
1452
1790
  }
1453
1791
  }
1454
1792
  }
1455
1793
  }
1456
1794
 
1457
1795
  markLabelError(label, `timed out after ${timeoutS}s -- killed after steer attempts`);
1458
- process.stdout.write(`⏱ dispatch [${label}] timed out after ${timeoutS}s -- session killed after steer attempts\n`);
1459
- process.exit(1);
1796
+ const timeoutResult = dispatch('result', ['--label', label]);
1797
+ emitTimeoutOutcome(label, `⏱ dispatch [${label}] timed out after ${timeoutS}s -- session killed after steer attempts`, timeoutResult);