npm - openclaw-scheduler - Versions diffs - 0.2.4 → 0.2.6 - Mend

openclaw-scheduler 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +14 -0
package/README.md +16 -6
package/cli.js +13 -4
package/dispatch/README.md +18 -3
package/dispatch/completion.mjs +1312 -34
package/dispatch/hooks.mjs +17 -5
package/dispatch/index.mjs +600 -226
package/dispatch/message-input.mjs +67 -0
package/dispatch/watcher.mjs +381 -43
package/dispatcher-strategies.js +203 -30
package/dispatcher.js +6 -1
package/gateway.js +71 -8
package/index.d.ts +1 -0
package/package.json +3 -1
package/scripts/dispatch-cli-utils.mjs +53 -0
package/scripts/inbox-watcher-guardrail.mjs +506 -0

package/dispatch/message-input.mjs ADDED Viewed

@@ -0,0 +1,67 @@
+import { readFileSync } from 'fs';
+import { isatty } from 'node:tty';
+function normalizeFlagValue(value, flagName) {
+  if (value === undefined || value === null) return null;
+  if (value === true) throw new Error(`${flagName} requires a value`);
+  return String(value);
+}
+export async function resolveMessageInput({
+  message = null,
+  messageFile = null,
+  messageEnv = null,
+  messageStdin = false,
+  stdinIsTTY = isatty(0),
+  env = process.env,
+  readFile = (path) => readFileSync(path, 'utf8'),
+  readStdin = () => readFileSync(0, 'utf8'),
+} = {}) {
+  const directMessage = normalizeFlagValue(message, '--message');
+  const filePath = normalizeFlagValue(messageFile, '--message-file');
+  const envVar = normalizeFlagValue(messageEnv, '--message-env');
+  const wantsStdin = messageStdin === true || messageStdin === 'true';
+  const explicitSources = [];
+  if (directMessage !== null) explicitSources.push('--message');
+  if (filePath !== null) explicitSources.push('--message-file');
+  if (envVar !== null) explicitSources.push('--message-env');
+  if (wantsStdin) explicitSources.push('--message-stdin');
+  if (explicitSources.length > 1) {
+    throw new Error(`choose only one of ${explicitSources.join(', ')} for the prompt source`);
+  }
+  if (directMessage !== null) return directMessage;
+  if (filePath !== null) {
+    if (filePath === '-') {
+      if (stdinIsTTY === true) throw new Error('--message-file - requires piped stdin');
+      return readStdin();
+    }
+    try {
+      return readFile(filePath);
+    } catch (err) {
+      throw new Error(`--message-file: could not read file: ${err.message}`, { cause: err });
+    }
+  }
+  if (envVar !== null) {
+    if (!Object.prototype.hasOwnProperty.call(env, envVar)) {
+      throw new Error(`--message-env: environment variable ${envVar} is not set`);
+    }
+    return String(env[envVar] ?? '');
+  }
+  if (wantsStdin) {
+    if (stdinIsTTY === true) throw new Error('--message-stdin requires piped stdin');
+    return readStdin();
+  }
+  if (stdinIsTTY !== true) {
+    const pipedText = readStdin();
+    return pipedText.length > 0 ? pipedText : null;
+  }
+  return null;
+}

package/dispatch/watcher.mjs CHANGED Viewed

@@ -31,7 +31,11 @@ import { readFileSync, writeFileSync, renameSync, statSync } from 'fs';
 import { dirname, join } from 'path';
 import { homedir } from 'os';
 import { fileURLToPath } from 'url';
-import { resolveCompletionDelivery } from './completion.mjs';
+import {
+  extractTerminalAssistantReplyFromEntries,
+  hasCompletionSignal,
+  resolveCompletionDelivery,
+} from './completion.mjs';
 import { sendMessage } from '../messages.js';
 const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -586,6 +590,28 @@ function readJsonlLastLines(sessionId, agentDir = 'main', n = 3) {
   }
 }
+function readJsonlTailEntries(sessionId, agentDir = 'main', n = 200) {
+  return readJsonlLastLines(sessionId, agentDir, n);
+}
+function getSessionTerminalReply(sessionId, agentDir = 'main') {
+  const entries = readJsonlTailEntries(sessionId, agentDir, 200);
+  return extractTerminalAssistantReplyFromEntries(entries);
+}
+function formatDiagnosticSnippet(reply) {
+  if (!reply || typeof reply !== 'string') return '';
+  const normalized = reply.trim();
+  if (!normalized) return '';
+  const maxLen = 1200;
+  const clipped = normalized.length > maxLen
+    ? normalized.slice(0, maxLen) + '\n\n..[truncated]'
+    : normalized;
+  return `\n\nLast assistant report observed:\n${clipped}`;
+}
 /**
  * Check if a session is currently mid-turn by inspecting its JSONL tail.
  * Returns a reason string if mid-turn is detected, null if safe to proceed.
@@ -658,6 +684,112 @@ function getJsonlMidTurnReason(sessionId, agentDir = 'main') {
   return null; // Last assistant entry appears to be a complete text reply -- safe to proceed
 }
+/**
+ * Check the JSONL tail for a pending tool handoff without requiring recent
+ * file activity. Long-running tool calls can leave the transcript flat for
+ * minutes, so stale mtime alone is not enough to declare the agent stuck.
+ *
+ * @param {string} sessionId - Internal session UUID
+ * @param {string} agentDir - Agent directory (default: 'main')
+ * @returns {string|null} reason string if a tool handoff appears pending
+ */
+function getJsonlPendingToolReason(sessionId, agentDir = 'main') {
+  const lastLines = readJsonlLastLines(sessionId, agentDir, 3);
+  if (!lastLines || lastLines.length === 0) return null;
+  const last = lastLines[lastLines.length - 1];
+  if (last?.role === 'assistant') {
+    const content = Array.isArray(last.content) ? last.content : [];
+    const toolUse = content.find(c => c?.type === 'tool_use');
+    if (toolUse) {
+      return `last assistant entry has tool_use (${toolUse.name || 'unknown'}) -- awaiting tool result`;
+    }
+    if (last.type === 'tool_use') {
+      return `last entry is tool_use (${last.name || 'unknown'}) -- awaiting tool result`;
+    }
+  }
+  if (last?.role === 'user') {
+    const content = Array.isArray(last.content) ? last.content : [];
+    if (content.some(c => c?.type === 'tool_result')) {
+      return 'last entry is tool_result (tool executed, awaiting assistant reply)';
+    }
+  }
+  if (last?.type === 'tool_result') {
+    return 'last entry is tool_result (tool executed, awaiting assistant reply)';
+  }
+  return null;
+}
+function parseTimestampMs(value) {
+  if (!value) return null;
+  if (typeof value === 'number') {
+    return Number.isFinite(value) ? value : null;
+  }
+  if (value instanceof Date) {
+    const timestamp = value.getTime();
+    return Number.isFinite(timestamp) ? timestamp : null;
+  }
+  const parsed = Date.parse(value);
+  return Number.isFinite(parsed) ? parsed : null;
+}
+/**
+ * Detect an agent session that has stopped making progress even though the
+ * watcher process itself is still alive and writing lastPing.
+ *
+ * This closes the failure mode where OpenClaw's Codex app-server retires a
+ * timed-out turn, but dispatch status keeps reporting "running" because the
+ * delivery watcher is still polling.
+ */
+function getRunningSessionStallReason(status, thresholdMs) {
+  if (!status?.sessionKey) return null;
+  const sessionAgent = status.sessionKey.split(':')[1] || 'main';
+  const entry = getSessionStoreEntry(status.sessionKey);
+  if (!entry) return null;
+  const sessionId = entry.sessionId || null;
+  const now = Date.now();
+  const activityTimes = [
+    parseTimestampMs(entry.updatedAt),
+    parseTimestampMs(entry.lastActivityAt),
+    parseTimestampMs(entry.sessionStartedAt),
+    parseTimestampMs(entry.startedAt),
+  ].filter(t => typeof t === 'number');
+  const jsonlMtime = sessionId ? getSessionJsonlMtime(sessionId, sessionAgent) : null;
+  if (typeof jsonlMtime === 'number') activityTimes.push(jsonlMtime);
+  if (typeof status?.liveness?.ageMs === 'number' && status.liveness.ageMs < thresholdMs) {
+    return null;
+  }
+  const lastActivityMs = activityTimes.length ? Math.max(...activityTimes) : null;
+  if (lastActivityMs !== null && now - lastActivityMs < thresholdMs) {
+    return null;
+  }
+  const pendingToolReason = sessionId ? getJsonlPendingToolReason(sessionId, sessionAgent) : null;
+  if (pendingToolReason) {
+    process.stderr.write(
+      `[watcher] ${status.label || 'session'} stale telemetry but pending tool handoff detected: ${pendingToolReason}\n`
+    );
+    return null;
+  }
+  const idleMinutes = lastActivityMs === null
+    ? Math.ceil(thresholdMs / 60000)
+    : Math.max(1, Math.floor((now - lastActivityMs) / 60000));
+  return (
+    `agent session stalled: no session/jsonl activity for ~${idleMinutes}min ` +
+    `while delivery watcher remained alive; likely app-server turn retired or stopped producing events`
+  );
+}
 /**
  * Read the last assistant entry's stop_reason from the session JSONL.
  * Returns the stop_reason string (e.g. 'end_turn', 'tool_use') or null if unavailable.
@@ -728,6 +860,7 @@ function markLabelError(label, errorSummary) {
     updateExistingLabel(label, (entry) => {
       if (entry.status === 'done') return false;
       entry.status = 'error';
+      entry.error = errorSummary || 'failed without result';
       entry.summary = errorSummary || 'failed without result';
     });
   } catch (e) {
@@ -735,6 +868,8 @@ function markLabelError(label, errorSummary) {
   }
 }
+let exitZeroOnTerminal = false;
 /**
  * Format and output the delivery message, then exit 0.
  * Also marks the label as done in labels.json before exiting.
@@ -768,7 +903,7 @@ function deliverResult(label, lastReply, fallbackSummary, completionPayload = nu
           `**Error:** ${stderr || 'non-zero exit'}\n\n` +
           `Job marked as \`error\`. The agent may have reported done without completing the actual work.\n`
         );
-        process.exit(1);
+        process.exit(exitZeroOnTerminal ? 0 : 1);
       }
     }
   } catch (loadErr) {
@@ -790,10 +925,32 @@ function deliverResult(label, lastReply, fallbackSummary, completionPayload = nu
       ? completion.deliveryText.slice(0, maxLen) + '\n\n..[truncated]'
       : completion.deliveryText;
     process.stdout.write(`🌶️ *dispatch* [${label}] completed:\n\n${reply}\n`);
-  } else {
-    process.stderr.write(`[watcher] [${label}] completion delivery suppressed (no meaningful reply or summary)\n`);
+    process.exit(0);
   }
-  process.exit(0);
+  const failureSummary = 'completed without a clean user-facing completion';
+  process.stderr.write(`[watcher] [${label}] completion delivery suppressed (no meaningful reply or summary)\n`);
+  markLabelError(label, failureSummary);
+  process.stdout.write(
+    `⚠️ dispatch [${label}] completed, but no clean user-facing completion was captured. ` +
+    `Internal diagnostics were suppressed; check scheduler run logs for details.\n`
+  );
+  process.exit(exitZeroOnTerminal ? 0 : 1);
+}
+function emitInterruptedOutcome(label, summary, result = null) {
+  process.stderr.write(`[watcher] [${label}] session auto-resolved as interrupted -- work may be incomplete\n`);
+  markLabelError(label, summary || 'interrupted: session went idle without calling done');
+  process.stdout.write(
+    `⚠️ dispatch [${label}] session went idle before completing -- work may be incomplete` +
+    `${formatDiagnosticSnippet(result?.diagnosticReply || result?.lastReply || null)}\n`
+  );
+  process.exit(exitZeroOnTerminal ? 0 : 1);
+}
+function emitTimeoutOutcome(label, message, result = null) {
+  process.stdout.write(`${message}${formatDiagnosticSnippet(result?.diagnosticReply || result?.lastReply || null)}\n`);
+  process.exit(exitZeroOnTerminal ? 0 : 1);
 }
 // -- Watcher heartbeat interval ref --------------------------------------
@@ -828,6 +985,8 @@ const flags = parseFlags(process.argv.slice(2));
 const label       = flags.label;
 const timeoutS    = parseInt(flags.timeout || '600', 10);
 const pollS       = parseInt(flags['poll-interval'] || '20', 10);
+const once        = flags.once === true || flags.once === 'true';
+exitZeroOnTerminal = once;
 // How long a session must be idle before we proactively check result
 const IDLE_RESULT_CHECK_MS = 60000;
@@ -837,6 +996,144 @@ if (!label) {
   process.exit(2);
 }
+function touchWatcherPing(label) {
+  updateExistingLabel(label, (entry) => {
+    if (entry.status !== 'running') return false;
+    entry.lastPing = new Date().toISOString();
+  });
+}
+function markWatcherPending(label, reason = 'target still running') {
+  process.stderr.write(`[watcher] WATCHER_PENDING label=${label} reason=${reason}\n`);
+  process.exit(0);
+}
+function clearWatcherRetryAfter(label) {
+  updateExistingLabel(label, (entry) => {
+    if (!entry.watcherRetryAfter) return false;
+    delete entry.watcherRetryAfter;
+  });
+}
+function handleOnce529(label, errorMsg) {
+  const labels = loadLabels();
+  const entry = labels[label] || {};
+  const retryCount = getRetryCount(label);
+  if (retryCount >= MAX_529_RETRIES) {
+    markLabelError(label, `max_retries_exceeded (${retryCount}x 529): ${errorMsg}`);
+    process.stdout.write(
+      `🌶️ *dispatch* [${label}] failed after ${MAX_529_RETRIES} retries (529 overload)\n` +
+      `Error: ${errorMsg}\n`
+    );
+    process.exit(0);
+  }
+  const retryAfterMs = parseTimestampMs(entry.watcherRetryAfter);
+  if (!retryAfterMs) {
+    const retryResult = attempt529Retry(label, retryCount, errorMsg);
+    if (!retryResult.retry) return handleOnce529(label, errorMsg);
+    updateExistingLabel(label, (current) => {
+      current.watcherRetryAfter = new Date(Date.now() + retryResult.delayMs).toISOString();
+    });
+    markWatcherPending(label, `529 retry scheduled for future tick (${retryResult.delayMs / 1000}s)`);
+  }
+  if (Date.now() < retryAfterMs) {
+    markWatcherPending(label, '529 retry backoff active');
+  }
+  if (respawnSession(label)) {
+    clearWatcherRetryAfter(label);
+    markWatcherPending(label, '529 retry dispatched');
+  }
+  markLabelError(label, `529 retry failed -- could not respawn session: ${errorMsg}`);
+  process.stdout.write(
+    `🌶️ *dispatch* [${label}] 529 retry failed -- could not respawn session\n` +
+    `Error: ${errorMsg}\n`
+  );
+  process.exit(0);
+}
+function runOnceAndExit() {
+  try {
+    touchWatcherPing(label);
+  } catch {
+    // Best-effort -- a quick-poll tick must not fail because heartbeat metadata raced.
+  }
+  const status = dispatch('status', ['--label', label]);
+  if (!status?.ok) {
+    markWatcherPending(label, 'status unavailable');
+  }
+  if (status.status === 'error') {
+    const errorMsg = status.error || status.summary || '';
+    if (is529Error(errorMsg)) {
+      handleOnce529(label, errorMsg);
+    }
+  }
+  if (status.status !== 'running') {
+    const terminalResult = dispatch('result', ['--label', label]);
+    const terminalCompletion = terminalResult?.completion || status?.completion || null;
+    if (status.status === 'done') {
+      const currentRetryCount = getRetryCount(label);
+      if (currentRetryCount > 0) setRetryCount(label, 0);
+      const gwRetryCount = getGwRestartRetryCount(label);
+      if (gwRetryCount > 0) setGwRestartRetryCount(label, 0);
+      deliverResult(label, terminalResult?.lastReply, status.summary, terminalCompletion);
+    }
+    if (status.status === 'interrupted') {
+      emitInterruptedOutcome(label, status.summary, terminalResult);
+    }
+    const summary = status.error || status.summary || `terminal failure (${status.status || 'unknown'})`;
+    markLabelError(label, summary);
+    process.stdout.write(`🌶️ *dispatch* [${label}] failed\nSummary: ${summary}\n`);
+    process.exit(0);
+  }
+  if (status.sessionKey) {
+    const entry = getSessionStoreEntry(status.sessionKey);
+    const sessionId = entry?.sessionId || null;
+    const sessionAgent = status.sessionKey.split(':')[1] || 'main';
+    const terminalJsonlReply = sessionId ? getSessionTerminalReply(sessionId, sessionAgent) : null;
+    if (sessionId && terminalJsonlReply && isSessionCleanlyFinished(sessionId, sessionAgent)) {
+      const result = dispatch('result', ['--label', label]);
+      deliverResult(label, result?.lastReply || terminalJsonlReply, 'completed (stop_reason=end_turn)', result?.completion || null);
+    }
+  }
+  const ageMs = status.liveness?.ageMs;
+  if (ageMs != null && ageMs >= IDLE_RESULT_CHECK_MS) {
+    const result = dispatch('result', ['--label', label]);
+    if (result?.lastReply || hasCompletionSignal(result?.completion)) {
+      deliverResult(label, result?.lastReply || null, null, result?.completion || null);
+    }
+    const stallReason = getRunningSessionStallReason(status, IDLE_RESULT_CHECK_MS);
+    if (stallReason) {
+      process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
+      markLabelError(label, stallReason);
+      process.stdout.write(
+        `❌ *dispatch* [${label}] failed\n` +
+        `Summary: ${stallReason}\n`
+      );
+      process.exit(0);
+    }
+  }
+  markWatcherPending(label);
+}
+if (once) {
+  runOnceAndExit();
+}
 // -- Start heartbeat -----------------------------------------------------
 // Write lastPing to labels.json every PING_INTERVAL_MS while the session is
 // still running. The watchdog guard in index.mjs reads lastPing to know this
@@ -870,17 +1167,47 @@ let lastKnownReply = null;
 let lastKnownCompletion = null;
 // -- SIGTERM handler (scheduler kills watcher with SIGTERM before SIGKILL) --
-// Ensures labels.json is updated and a delivery attempt is made even when killed.
+// Hand off to a fresh watcher instead of converting the kill into a fake success.
 process.on('SIGTERM', () => {
-  process.stderr.write(`[watcher] SIGTERM received for ${label} -- marking as interrupted\n`);
-  // Try to fetch the latest result before dying
+  process.stderr.write(`[watcher] SIGTERM received for ${label} -- attempting watcher handoff\n`);
+  let latestStatus = null;
+  try {
+    latestStatus = dispatch('status', ['--label', label]);
+  } catch {}
   try {
     const result = dispatch('result', ['--label', label]);
     if (result?.lastReply) lastKnownReply = result.lastReply;
     if (result?.completion) lastKnownCompletion = result.completion;
   } catch {}
-  // deliverResult calls process.exit(0) internally
-  deliverResult(label, lastKnownReply, 'interrupted by watcher timeout', lastKnownCompletion);
+  if (latestStatus?.status === 'done') {
+    deliverResult(label, lastKnownReply, latestStatus.summary || null, lastKnownCompletion || latestStatus?.completion || null);
+  }
+  if (latestStatus?.status === 'interrupted') {
+    markLabelError(label, latestStatus.summary || 'interrupted: session went idle without calling done');
+    process.exit(1);
+  }
+  if (latestStatus?.status && latestStatus.status !== 'running') {
+    const summary = latestStatus.error || latestStatus.summary || `terminal failure (${latestStatus.status})`;
+    markLabelError(label, summary);
+    process.stdout.write(`🌶️ *dispatch* [${label}] failed\nSummary: ${summary}\n`);
+    process.exit(1);
+  }
+  const handoff = dispatch('watcher-handoff', ['--label', label, '--reason', 'sigterm']);
+  if (handoff?.ok && (handoff.scheduled || handoff.reason === 'label already terminal' || handoff.reason === 'delivery disabled for this label')) {
+    process.stderr.write(`[watcher] SIGTERM handoff ${handoff.scheduled ? 'scheduled' : 'skipped'} for ${label}\n`);
+    process.exit(0);
+  }
+  const failureSummary = 'interrupted by watcher timeout (handoff failed)';
+  markLabelError(label, failureSummary);
+  process.stdout.write(`⚠️ dispatch [${label}] watcher interrupted and handoff failed\nSummary: ${failureSummary}\n`);
+  process.exit(1);
 });
 // -- Rolling deadline vars ------------------------------------
@@ -1024,11 +1351,21 @@ while (Date.now() < deadline) {
   // -- Path 1: status auto-resolved to done ------------------
   if (status.status !== 'running') {
+    const terminalResult = dispatch('result', ['--label', label]);
+    const terminalCompletion = terminalResult?.completion || status?.completion || null;
+    const hasTerminalCompletionEvidence = Boolean(
+      terminalResult?.lastReply
+      || terminalResult?.completion?.deliveryText
+      || terminalResult?.completion?.summary
+      || status?.completion?.deliveryText
+      || status?.completion?.summary
+    );
     // -- Spawn failure detection -----------------------------------------
     // If the session was auto-resolved to 'done' (or 'spawn-warning') but was
-    // never seen in the gateway, it never ran -- this is a spawn failure.
-    // Causes: auth timeout, quota exhaustion, gateway error at spawn time.
-    if (!sessionEverFound && (status.status === 'done' || status.status === 'spawn-warning' || status.status === 'error')) {
+    // never seen in the gateway, it never ran -- unless a terminal completion
+    // payload/reply proves the work already finished before this watcher saw it.
+    if (!sessionEverFound && (status.status === 'spawn-warning' || status.status === 'error' || (status.status === 'done' && !hasTerminalCompletionEvidence))) {
       const spawnErrMsg =
         `[dispatch] SPAWN FAILURE: session ${status.sessionKey || '(unknown)'} never appeared ` +
         `in gateway -- spawn likely failed (auth timeout, quota, or gateway error). Label: ${label}`;
@@ -1055,7 +1392,7 @@ while (Date.now() < deadline) {
     // If the session DID produce a lastReply before being killed, deliver it normally.
     if (sessionEverFound && isGatewayRestartKill(status.summary)) {
       const gwCheckResult = dispatch('result', ['--label', label]);
-      if (!gwCheckResult?.lastReply && !gwCheckResult?.completion?.deliveryText) {
+      if (!gwCheckResult?.lastReply && !hasCompletionSignal(gwCheckResult?.completion)) {
         // No result captured -- session was killed before completing
         const retryCount = getGwRestartRetryCount(label);
         if (retryCount >= MAX_GW_RESTART_RETRIES) {
@@ -1113,12 +1450,8 @@ while (Date.now() < deadline) {
     //
     // NOTE: Always resolve as 'interrupted', never 'done'. Only agent-side cmdDone may set status=done.
     if (status.status === 'interrupted') {
-      process.stderr.write(`[watcher] [${label}] session auto-resolved as interrupted -- work may be incomplete\n`);
-      process.stdout.write(
-        `⚠️ dispatch [${label}] session went idle before completing -- work may be incomplete\n`
-      );
-      markLabelError(label, status.summary || 'interrupted: session went idle without calling done');
-      process.exit(1);
+      const interruptedResult = dispatch('result', ['--label', label]);
+      emitInterruptedOutcome(label, status.summary, interruptedResult);
     }
     // Reset 529 retryCount on successful completion
@@ -1129,8 +1462,7 @@ while (Date.now() < deadline) {
         process.stderr.write(`[watcher] [${label}] completed after ${currentRetryCount} retry(ies), reset retryCount\n`);
       }
     }
-    const result = dispatch('result', ['--label', label]);
-    deliverResult(label, result?.lastReply, status.summary, result?.completion || status?.completion || null);
+    deliverResult(label, terminalResult?.lastReply, status.summary, terminalCompletion);
   }
   // -- Path 2a: stop_reason early delivery (clean end_turn) --
@@ -1141,10 +1473,11 @@ while (Date.now() < deadline) {
     const _e2a = getSessionStoreEntry(status.sessionKey);
     const _sid2a = _e2a?.sessionId || null;
     const _adir2a = (status.sessionKey.split(':')[1]) || 'main';
-    if (_sid2a && isSessionCleanlyFinished(_sid2a, _adir2a)) {
+    const terminalJsonlReply = _sid2a ? getSessionTerminalReply(_sid2a, _adir2a) : null;
+    if (_sid2a && terminalJsonlReply && isSessionCleanlyFinished(_sid2a, _adir2a)) {
       process.stderr.write(`[watcher] stop_reason=end_turn detected -- delivering early\n`);
       const result = dispatch('result', ['--label', label]);
-      deliverResult(label, result?.lastReply, 'completed (stop_reason=end_turn)', result?.completion || null);
+      deliverResult(label, result?.lastReply || terminalJsonlReply, 'completed (stop_reason=end_turn)', result?.completion || null);
       // deliverResult exits
     }
   }
@@ -1158,9 +1491,20 @@ while (Date.now() < deadline) {
   const ageMs = status.liveness?.ageMs;
   if (ageMs != null && ageMs >= IDLE_RESULT_CHECK_MS) {
     const result = dispatch('result', ['--label', label]);
-    if (result?.lastReply || result?.completion?.deliveryText) {
+    if (result?.lastReply || hasCompletionSignal(result?.completion)) {
       deliverResult(label, result?.lastReply || null, null, result?.completion || null);
     }
+    const stallReason = getRunningSessionStallReason(status, IDLE_RESULT_CHECK_MS);
+    if (stallReason) {
+      process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
+      markLabelError(label, stallReason);
+      process.stdout.write(
+        `❌ *dispatch* [${label}] failed\n` +
+        `Summary: ${stallReason}\n`
+      );
+      process.exit(1);
+    }
   }
@@ -1183,11 +1527,7 @@ if (finalStatus?.status === 'done') {
 // If status is interrupted (auto-resolved as incomplete), exit non-zero
 if (finalStatus?.status === 'interrupted') {
   process.stderr.write(`[watcher] [${label}] final status=interrupted -- session idle without completion\n`);
-  process.stdout.write(
-    `⚠️ dispatch [${label}] session went idle before completing -- work may be incomplete\n`
-  );
-  markLabelError(label, finalStatus?.summary || 'interrupted: session went idle without calling done');
-  process.exit(1);
+  emitInterruptedOutcome(label, finalStatus?.summary, finalResult);
 }
 // -- Token-based activity check before steering ----------------------------
@@ -1237,7 +1577,7 @@ if (sessionInternalId) {
 // If the session already completed (gateway pruned it -> null tokens), exit cleanly.
 if (statusAtDeadline?.status === 'done' || baselineTokens === null) {
   const r = dispatch('result', ['--label', label]);
-  if (r?.lastReply || r?.completion?.deliveryText) {
+  if (r?.lastReply || hasCompletionSignal(r?.completion)) {
     // deliverResult calls process.exit(0) internally
     deliverResult(label, r?.lastReply || null, statusAtDeadline?.summary || null, r?.completion || null);
   }
@@ -1255,8 +1595,7 @@ if (statusAtDeadline?.status === 'done' || baselineTokens === null) {
       // Session truly not found -- telemetry unavailable, exit
       process.stderr.write(`[watcher] token telemetry unavailable for ${label}; session not in store\n`);
       markLabelError(label, `timed out after ${timeoutS}s -- token telemetry unavailable`);
-      process.stdout.write(`⏱ dispatch [${label}] timed out after ${timeoutS}s -- token telemetry unavailable; no steer/kill attempted\n`);
-      process.exit(1);
+      emitTimeoutOutcome(label, `⏱ dispatch [${label}] timed out after ${timeoutS}s -- token telemetry unavailable; no steer/kill attempted`, r);
     }
     // Session IS in store but no tokens -- mid-tool-call, fall through to activity window
     // Use updatedAt as activity signal instead of tokens
@@ -1277,7 +1616,7 @@ while (Date.now() - flatSince < FLAT_WINDOW_MS) {
     deliverResult(label, r?.lastReply || null, st.summary, r?.completion || st?.completion || null);
   }
   const r2 = dispatch('result', ['--label', label]);
-  if (r2?.lastReply || r2?.completion?.deliveryText) {
+  if (r2?.lastReply || hasCompletionSignal(r2?.completion)) {
     // deliverResult calls process.exit(0) internally
     deliverResult(label, r2?.lastReply || null, null, r2?.completion || null);
   }
@@ -1290,8 +1629,8 @@ while (Date.now() - flatSince < FLAT_WINDOW_MS) {
     if (!entry) {
       process.stderr.write(`[watcher] token telemetry lost for ${label}; session gone from store\n`);
       markLabelError(label, `timed out after ${timeoutS}s -- token telemetry lost`);
-      process.stdout.write(`⏱ dispatch [${label}] timed out after ${timeoutS}s -- token telemetry lost; no steer/kill attempted\n`);
-      process.exit(1);
+      const tokenLostResult = dispatch('result', ['--label', label]);
+      emitTimeoutOutcome(label, `⏱ dispatch [${label}] timed out after ${timeoutS}s -- token telemetry lost; no steer/kill attempted`, tokenLostResult);
     }
     // Still in store -- check if updatedAt advanced (tool call still running)
     // Normalize: updatedAt may be seconds or milliseconds depending on agent framework version
@@ -1371,7 +1710,7 @@ if (sessionInternalId) {
         deliverResult(label, rExt?.lastReply || null, stExt.summary, rExt?.completion || stExt?.completion || null);
       }
       const rExt2 = dispatch('result', ['--label', label]);
-      if (rExt2?.lastReply || rExt2?.completion?.deliveryText) {
+      if (rExt2?.lastReply || hasCompletionSignal(rExt2?.completion)) {
         // deliverResult calls process.exit(0) internally
         deliverResult(label, rExt2?.lastReply || null, null, rExt2?.completion || null);
       }
@@ -1428,7 +1767,7 @@ for (const round of steerRounds) {
     deliverResult(label, r3?.lastReply || null, st2.summary, r3?.completion || st2?.completion || null);
   }
   const r3 = dispatch('result', ['--label', label]);
-  if (r3?.lastReply || r3?.completion?.deliveryText) {
+  if (r3?.lastReply || hasCompletionSignal(r3?.completion)) {
     // deliverResult calls process.exit(0) internally
     deliverResult(label, r3?.lastReply || null, null, r3?.completion || null);
   }
@@ -1443,17 +1782,16 @@ for (const round of steerRounds) {
       if (st3?.status === 'done') {
         // Check if a result was captured before marking as error
         const r4 = dispatch('result', ['--label', label]);
-        if (r4?.lastReply || r4?.completion?.deliveryText) {
+        if (r4?.lastReply || hasCompletionSignal(r4?.completion)) {
           deliverResult(label, r4?.lastReply || null, st3.summary, r4?.completion || st3?.completion || null); // deliverResult calls process.exit(0)
         }
         markLabelError(label, 'timed out -- killed after steer attempts (no result captured)');
-        process.stdout.write(`⏱ dispatch [${label}] killed after steer attempts -- no result captured\n`);
-        process.exit(1);
+        emitTimeoutOutcome(label, `⏱ dispatch [${label}] killed after steer attempts -- no result captured`, r4);
       }
     }
   }
 }
 markLabelError(label, `timed out after ${timeoutS}s -- killed after steer attempts`);
-process.stdout.write(`⏱ dispatch [${label}] timed out after ${timeoutS}s -- session killed after steer attempts\n`);
-process.exit(1);
+const timeoutResult = dispatch('result', ['--label', label]);
+emitTimeoutOutcome(label, `⏱ dispatch [${label}] timed out after ${timeoutS}s -- session killed after steer attempts`, timeoutResult);