npm - openclaw-scheduler - Versions diffs - 0.2.5 → 0.2.6 - Mend

openclaw-scheduler 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dispatch/completion.mjs +297 -20
package/dispatch/index.mjs +58 -40
package/dispatch/watcher.mjs +273 -6
package/dispatcher-strategies.js +82 -10
package/dispatcher.js +6 -1
package/gateway.js +39 -0
package/package.json +1 -1

package/dispatch/completion.mjs CHANGED Viewed

@@ -34,6 +34,13 @@ const TEST_FRAGMENT_RE = /\b(?:test|tests|spec|coverage|lint|typecheck|tsc|eslin
 const TEST_APPLICABILITY_RE = /\b(?:test|tests|pytest|jest|vitest|mocha|cypress|playwright|npm\s+test|pnpm\s+test|yarn\s+test|cargo\s+test|go\s+test|rspec)\b/i;
 const TEST_NEGATION_RE = /\b(?:do\s+not|don't|dont|never|skip|without|no)\s+(?:run\s+)?(?:the\s+)?tests?\b/i;
 const PUSH_FORBIDDEN_RE = /\b(?:do\s+not|don't|dont|never|must\s+not|should\s+not)\s+(?:git\s+push|push)\b|\bno\s+push\b|\bwithout\s+pushing\b/i;
+const EXPLICIT_TECHNICAL_MARKER_RE = /\b(?:Technically|Technical details)\s*:\s*/i;
+const HUMAN_SUMMARY_SECTION_RE = /(?:^|\n)\s*(?:Human-readable summary|Human summary)\s*:\s*/i;
+const TECHNICAL_DETAILS_SECTION_RE = /(?:^|\n)\s*(?:Technical details?|Details(?:_technical)?)\s*:\s*/i;
+const HUMAN_SUMMARY_LABEL_RE = /^(?:human-readable summary|human summary)\s*:\s*/i;
+const TECHNICAL_DETAILS_LABEL_RE = /^(?:technical details?|details(?:_technical)?)\s*:\s*/i;
+const FINAL_REPORT_HEADING_RE = /^(?:#{1,6}\s*)?(?:root cause|files? changed|changes|validation|tests?(?: run| passed)?|sacrificial(?: delivery)?(?: result)?|deployment(?:\/live-runtime)?(?: step)?|live-runtime(?: step)?|result|results|summary|highlights?|notes?|follow[- ]ups?|next steps?|blockers?|implementation|what changed|verification)\s*:?$/i;
+const FINAL_REPORT_CUE_RE = /\b(?:root cause|files? changed|tests? run|validation|sacrificial(?: delivery)?(?: result)?|deployment(?:\/live-runtime)?(?: step)?|live-runtime(?: step)?|final report|human-readable report|files changed|tests passed)\b/i;
 export function normalizeCompletionText(value) {
   if (typeof value !== 'string') return null;
@@ -71,6 +78,56 @@ function cleanMarkdown(text) {
     .replace(/^>\s?/gm, '');
 }
+function normalizeReportLineEndings(text) {
+  const normalized = normalizeCompletionText(text);
+  if (!normalized) return null;
+  return stripAnsi(normalized)
+    .replace(/\r\n?/g, '\n')
+    .split('\n')
+    .map(line => line.replace(/[ \t]+$/g, ''))
+    .join('\n')
+    .replace(/\n{3,}/g, '\n\n')
+    .trim();
+}
+function isLikelyHumanFinalReport(text) {
+  const normalized = normalizeReportLineEndings(text);
+  if (!normalized) return false;
+  if (isGenericOrTrivial(normalized)) return false;
+  if (isInternalTransportNoiseText(normalized)) return false;
+  if (looksLikeRawPayloadText(normalized)) return false;
+  if (looksLikeGunbrokerReport(normalized)) return false;
+  const rawLines = normalized
+    .split('\n')
+    .map(line => line.trim())
+    .filter(Boolean)
+    .filter(line => !/^```/.test(line));
+  if (rawLines.length < 3) return false;
+  const cleanedLines = rawLines.map(line => cleanMarkdown(line).replace(/\s+/g, ' ').trim());
+  const headingCount = cleanedLines.filter(line => FINAL_REPORT_HEADING_RE.test(line)).length;
+  const itemCount = rawLines.filter(isItemLine).length;
+  const hasCue = FINAL_REPORT_CUE_RE.test(normalized);
+  const hasSectionLabel = /^#{1,6}\s+\S|^[A-Za-z][A-Za-z0-9 /_-]{2,60}:$/m.test(normalized);
+  // This is the key path for real completion reports from agents: multiple
+  // human-readable sections plus bullets. Those reports are already the final
+  // answer and must not be collapsed into "Files changed: Validation: ...".
+  if (hasCue && headingCount >= 2 && (itemCount >= 1 || rawLines.length >= 5)) return true;
+  // Allow slightly shorter reports with an explicit root cause / validation shape.
+  if (hasCue && headingCount >= 1 && itemCount >= 2 && hasSectionLabel) return true;
+  return false;
+}
+function getPassThroughHumanFinalReport(text) {
+  const normalized = normalizeReportLineEndings(text);
+  if (!normalized) return null;
+  return isLikelyHumanFinalReport(normalized) ? normalized : null;
+}
 function isGenericOrTrivial(text) {
   const normalized = normalizeCompletionText(text)?.toLowerCase().replace(/\s+/g, ' ').trim();
   if (!normalized) return true;
@@ -205,6 +262,80 @@ function upperFirst(text) {
   return text.charAt(0).toUpperCase() + text.slice(1);
 }
+function extractExplicitTechnicalTail(text) {
+  const normalized = normalizeCompletionText(text);
+  if (!normalized) return null;
+  const match = EXPLICIT_TECHNICAL_MARKER_RE.exec(normalized);
+  if (!match || typeof match.index !== 'number' || match.index <= 0) return null;
+  const lead = normalizeCompletionText(normalized.slice(0, match.index));
+  const technicalTail = normalizeCompletionText(normalized.slice(match.index + match[0].length));
+  if (!lead || !technicalTail) return null;
+  return { lead, technicalTail };
+}
+function extractStructuredSummarySections(text) {
+  const normalized = normalizeCompletionText(text);
+  if (!normalized) return null;
+  const cleaned = cleanMarkdown(normalized).replace(/\r\n?/g, '\n').trim();
+  if (!cleaned) return null;
+  const humanMatch = HUMAN_SUMMARY_SECTION_RE.exec(cleaned);
+  const technicalMatch = TECHNICAL_DETAILS_SECTION_RE.exec(cleaned);
+  if (!humanMatch && !technicalMatch) return null;
+  let summary = null;
+  let technical = null;
+  if (humanMatch) {
+    const summaryStart = humanMatch.index + humanMatch[0].length;
+    const summaryEnd = technicalMatch && technicalMatch.index > humanMatch.index
+      ? technicalMatch.index
+      : cleaned.length;
+    summary = normalizeCompletionText(cleaned.slice(summaryStart, summaryEnd));
+  }
+  if (technicalMatch) {
+    const technicalStart = technicalMatch.index + technicalMatch[0].length;
+    technical = normalizeCompletionText(cleaned.slice(technicalStart));
+  }
+  if (!summary && !technical) return null;
+  return { summary, technical };
+}
+function stripHumanSummaryLabel(text) {
+  const normalized = normalizeCompletionText(text);
+  if (!normalized) return null;
+  const sections = extractStructuredSummarySections(normalized);
+  if (sections?.summary) return normalizeCompletionText(sections.summary);
+  return normalizeCompletionText(normalized.replace(HUMAN_SUMMARY_LABEL_RE, ''));
+}
+function normalizeTechnicalDetailLine(text) {
+  const normalized = normalizeCompletionText(text);
+  if (!normalized) return null;
+  const sections = extractStructuredSummarySections(normalized);
+  const source = normalizeCompletionText(sections?.technical || normalized);
+  if (!source) return null;
+  const lines = prepareLines(source)
+    .map(line => line
+      .replace(HUMAN_SUMMARY_LABEL_RE, '')
+      .replace(TECHNICAL_DETAILS_LABEL_RE, '')
+      .replace(/^[-*•]\s+/, '')
+      .trim())
+    .filter(Boolean);
+  const compact = normalizeCompletionText(lines.join(' '));
+  return compact || null;
+}
 function looksLikeRawPayloadText(text) {
   const normalized = normalizeCompletionText(text);
   if (!normalized) return false;
@@ -409,6 +540,107 @@ function cleanTechnicalFragment(text) {
   return cleaned || null;
 }
+function cleanLeadForHumanSummary(text) {
+  const normalized = normalizeCompletionText(text);
+  if (!normalized) return null;
+  const cleaned = replaceTechnicalPhrases(
+    cleanMarkdown(normalized)
+      .replace(/\bsaved\s+[a-z0-9_]+(?:\/[a-z0-9_]+)+\b/gi, 'your saved progress')
+      .replace(/\b([a-z][A-Za-z0-9_]*[A-Z][A-Za-z0-9_]*)\b/g, (_, token) => humanizeCamelToken(token))
+      .replace(/\b([a-z0-9]+_[a-z0-9_]+)\b/g, token => token.replace(/_/g, ' ')),
+  )
+    .replace(/\brepeating the completed one\b/gi, 'repeating the one you already finished')
+    .replace(/\s+,/g, ',')
+    .replace(/\s+\./g, '.')
+    .replace(/\s+/g, ' ')
+    .trim();
+  return cleaned || null;
+}
+function formatMixedTechnicalSubject(subject) {
+  let cleaned = normalizeCompletionText(subject);
+  if (!cleaned) return 'the missing data';
+  cleaned = replaceTechnicalPhrases(
+    cleanMarkdown(cleaned)
+      .replace(/\bhealth auto export\b/gi, 'source export')
+      .replace(/\bworkouts?\.json\b/gi, 'the export')
+      .replace(/\b([a-z][A-Za-z0-9_]*[A-Z][A-Za-z0-9_]*)\b/g, (_, token) => humanizeCamelToken(token))
+      .replace(/\b([a-z0-9]+_[a-z0-9_]+)\b/g, token => token.replace(/_/g, ' ')),
+  )
+    .replace(/^the\s+(?!missing\b)/i, '')
+    .replace(/\s+/g, ' ')
+    .trim();
+  if (!cleaned) return 'the missing data';
+  if (/^the\s+missing\b/i.test(cleaned)) return cleaned;
+  if (/^missing\b/i.test(cleaned)) return `the ${cleaned}`;
+  if (/^(?:the|your|this|that)\b/i.test(cleaned)) return cleaned;
+  return `the ${cleaned}`;
+}
+function buildSourceSideHumanSentence(technicalTail) {
+  const normalized = cleanMarkdown(normalizeCompletionText(technicalTail) || '')
+    .replace(/\s+/g, ' ')
+    .trim();
+  if (!normalized) return null;
+  const hasSourceSide = /\bsource[- ]side\b/i.test(normalized);
+  const hasEmptySource = /\b(?:contains zero|count 0|empty|zero [a-z ]+ objects?|no [a-z ]+ to import)\b/i.test(normalized);
+  if (!hasSourceSide && !hasEmptySource) return null;
+  const subjectMatch = normalized.match(/\b(?:confirmed|found|verified|checked)\s+(the\s+missing\s+.+?)\s+is\s+source[- ]side\b/i)
+    || normalized.match(/\b(the\s+missing\s+.+?)\s+is\s+source[- ]side\b/i)
+    || normalized.match(/\b(?:confirmed|found|verified|checked)\s+(.+?)\s+is\s+source[- ]side\b/i)
+    || normalized.match(/\b(.+?)\s+is\s+source[- ]side\b/i);
+  const subject = formatMixedTechnicalSubject(subjectMatch?.[1] || 'missing data');
+  if (hasEmptySource) {
+    return `I also checked ${subject}, and the source export is empty right now, so there isn't anything new to import yet.`;
+  }
+  return `I also checked ${subject}, and this turned out to be a source-data issue rather than a new scheduler bug.`;
+}
+function buildMixedLeadExpectation(text) {
+  const normalized = cleanMarkdown(normalizeCompletionText(text) || '').toLowerCase();
+  if (!normalized) return null;
+  if (/\b(?:next|should|will)\b/.test(normalized)) return null;
+  if (/\b(?:planner|plan|scheduled session|session|progression|workout)\b/.test(normalized)) {
+    return 'The next plan should reflect that automatically.';
+  }
+  if (/\b(?:completion|summary|delivery|message|report)\b/.test(normalized)) {
+    return 'The next finished job should show that automatically.';
+  }
+  if (/\b(?:import|sync)\b/.test(normalized)) {
+    return 'The next sync should reflect that automatically.';
+  }
+  return 'The next run should reflect that automatically.';
+}
+function buildHumanSummaryFromMixedTechnicalText(rawText) {
+  const split = extractExplicitTechnicalTail(rawText);
+  if (!split) return null;
+  const cleanedLead = cleanLeadForHumanSummary(split.lead);
+  const leadSummary = cleanedLead
+    ? summarizeCompletionText(cleanedLead, { skipEmbeddedObject: true }) || summarizeProse(cleanedLead) || asSentence(cleanedLead)
+    : null;
+  if (!leadSummary) return null;
+  const sentences = [leadSummary];
+  const sourceSideSentence = buildSourceSideHumanSentence(split.technicalTail);
+  if (sourceSideSentence) sentences.push(sourceSideSentence);
+  const expectation = buildMixedLeadExpectation(sentences.join(' '));
+  if (expectation) sentences.push(expectation);
+  return truncateText(sentences.join(' '), MAX_DELIVERY_CHARS);
+}
 function isTestOrValidationFragment(fragment) {
   const cleaned = normalizeCompletionText(fragment);
   if (!cleaned) return false;
@@ -578,11 +810,21 @@ export function humanizeCompletionText(value) {
   const raw = normalizeCompletionText(value);
   if (!raw) return null;
-  const summarized = summarizeCompletionText(raw);
+  const passThroughReport = getPassThroughHumanFinalReport(raw);
+  if (passThroughReport) return passThroughReport;
+  const structuredSections = extractStructuredSummarySections(raw);
+  const summarySource = normalizeCompletionText(structuredSections?.summary || raw);
+  if (!summarySource) return null;
+  const mixedSummary = buildHumanSummaryFromMixedTechnicalText(summarySource);
+  if (mixedSummary) return mixedSummary;
+  const summarized = summarizeCompletionText(summarySource);
   if (!summarized) return null;
-  if (!looksTechnicalCompletionSummary(raw, summarized)) return summarized;
+  if (!looksTechnicalCompletionSummary(summarySource, summarized)) return stripHumanSummaryLabel(summarized) || summarized;
-  return buildHumanizedTechnicalSummary(raw, summarized) || summarized;
+  return buildHumanizedTechnicalSummary(summarySource, summarized) || summarized;
 }
 function summarizeChecklistTechnicalDetails(checklist, sha) {
@@ -615,29 +857,47 @@ function buildTechnicalDetailsText({ rawText, summaryText, completion } = {}) {
   const details = getCompletionTechnicalDetails(completion);
   const parts = [];
-  const rawTechnical = raw && looksTechnicalCompletionSummary(raw, summary) && raw !== summary;
+  const rawSections = extractStructuredSummarySections(raw);
+  const splitRaw = extractExplicitTechnicalTail(raw);
+  const rawTechnicalSource = normalizeTechnicalDetailLine(rawSections?.technical || splitRaw?.technicalTail || raw);
+  const rawHasExplicitTechnical = Boolean(rawSections?.technical || splitRaw?.technicalTail);
+  const rawTechnical = Boolean(
+    rawTechnicalSource
+      && ((rawHasExplicitTechnical && rawTechnicalSource !== summary)
+        || (looksTechnicalCompletionSummary(rawTechnicalSource, summary) && rawTechnicalSource !== summary)),
+  );
   if (rawTechnical) {
-    parts.push(truncateText(cleanMarkdown(raw).replace(/\s+/g, ' ').trim(), 260));
+    parts.push(truncateText(rawTechnicalSource, 260));
   }
   let completionDetailsAreTechnical = false;
   if (typeof details === 'string') {
-    const normalized = normalizeCompletionText(details);
-    completionDetailsAreTechnical = Boolean(normalized && looksTechnicalCompletionSummary(normalized, summary));
+    const detailSections = extractStructuredSummarySections(details);
+    const splitDetails = extractExplicitTechnicalTail(details);
+    const normalized = normalizeTechnicalDetailLine(detailSections?.technical || splitDetails?.technicalTail || details);
+    completionDetailsAreTechnical = Boolean(
+      normalized && (detailSections?.technical || splitDetails?.technicalTail || looksTechnicalCompletionSummary(normalized, summary)),
+    );
     if (normalized
       && !isInternalTransportNoiseText(normalized)
       && (completionDetailsAreTechnical || rawTechnical)
-      && (!rawTechnical || normalized !== raw)) {
+      && (!rawTechnical || normalized !== rawTechnicalSource)) {
       parts.push(truncateText(normalized, 220));
     }
   } else if (details && typeof details === 'object') {
     const rawSummary = normalizeCompletionText(details.raw_summary);
-    completionDetailsAreTechnical = Boolean(rawSummary && looksTechnicalCompletionSummary(rawSummary, summary));
-    if (rawSummary
-      && !isInternalTransportNoiseText(rawSummary)
+    const detailSummarySections = extractStructuredSummarySections(rawSummary);
+    const splitDetailSummary = extractExplicitTechnicalTail(rawSummary);
+    const technicalSummary = normalizeTechnicalDetailLine(detailSummarySections?.technical || splitDetailSummary?.technicalTail || rawSummary);
+    completionDetailsAreTechnical = Boolean(
+      technicalSummary && (detailSummarySections?.technical || splitDetailSummary?.technicalTail || looksTechnicalCompletionSummary(technicalSummary, summary)),
+    );
+    if (technicalSummary
+      && !isInternalTransportNoiseText(technicalSummary)
       && (completionDetailsAreTechnical || rawTechnical)
-      && (!rawTechnical || rawSummary !== raw)) {
-      parts.push(truncateText(cleanMarkdown(rawSummary).replace(/\s+/g, ' ').trim(), 220));
+      && (!rawTechnical || technicalSummary !== rawTechnicalSource)) {
+      parts.push(truncateText(technicalSummary, 220));
     }
   }
@@ -647,7 +907,7 @@ function buildTechnicalDetailsText({ rawText, summaryText, completion } = {}) {
   const unique = [];
   const seen = new Set();
   for (const part of parts) {
-    const normalized = normalizeCompletionText(part);
+    const normalized = normalizeTechnicalDetailLine(part) || normalizeCompletionText(part);
     if (!normalized) continue;
     const key = normalized.toLowerCase();
     if (seen.has(key)) continue;
@@ -659,22 +919,39 @@ function buildTechnicalDetailsText({ rawText, summaryText, completion } = {}) {
 }
 function composeDeliveryText(summaryText, technicalDetailsText = null) {
-  const summary = normalizeCompletionText(summaryText);
+  const summarySections = extractStructuredSummarySections(summaryText);
+  const summary = stripHumanSummaryLabel(summarySections?.summary || summaryText);
   if (!summary) return null;
-  const technicalLines = Array.isArray(technicalDetailsText)
-    ? technicalDetailsText.map(line => normalizeCompletionText(line)).filter(Boolean)
-    : [];
+  const technicalCandidates = [];
+  if (summarySections?.technical) technicalCandidates.push(summarySections.technical);
+  if (Array.isArray(technicalDetailsText)) technicalCandidates.push(...technicalDetailsText);
+  else if (technicalDetailsText != null) technicalCandidates.push(technicalDetailsText);
+  const technicalLines = [];
+  const seen = new Set();
+  for (const candidate of technicalCandidates) {
+    const normalized = normalizeTechnicalDetailLine(candidate);
+    if (!normalized) continue;
+    const key = normalized.toLowerCase();
+    if (seen.has(key)) continue;
+    seen.add(key);
+    technicalLines.push(normalized);
+  }
   if (technicalLines.length > 0) {
     return `${summary}\n\nTechnical details:\n- ${technicalLines.join('\n- ')}`;
   }
-  const technical = normalizeCompletionText(technicalDetailsText);
-  return technical ? `${summary}\n\nTechnical details:\n- ${technical}` : summary;
+  return summary;
 }
 export function summarizeCompletionText(value, { skipEmbeddedObject = false } = {}) {
   const raw = normalizeCompletionText(value);
   if (!raw) return null;
+  const passThroughReport = getPassThroughHumanFinalReport(raw);
+  if (passThroughReport) return passThroughReport;
   if (!skipEmbeddedObject) {
     const parsed = extractEmbeddedCompletionObject(raw);
     if (parsed !== null) {

package/dispatch/index.mjs CHANGED Viewed

@@ -205,6 +205,19 @@ function setLabel(name, data) {
   return labels[name];
 }
+function setLabelDone(name, data) {
+  const labels = mutateLabels((current) => {
+    current[name] = {
+      ...current[name],
+      ...data,
+      status: 'done',
+      updatedAt: new Date().toISOString(),
+    };
+    delete current[name].error;
+  });
+  return labels[name];
+}
 // -- Gateway Calls --------------------------------------------
 /**
@@ -352,7 +365,17 @@ function getSessionJsonlPath(agent = 'main', sessionId) {
 function inspectSessionActivitySignal(sessionKey, sessionsStore) {
   if (!sessionKey || !sessionsStore?.[sessionKey]) {
-    return { found: false, hasActivitySignal: false, messageCount: null, jsonlExists: false, hasTokens: false, updatedAtMs: null };
+    return {
+      found: false,
+      hasStartedSignal: false,
+      hasActivitySignal: false,
+      messageCount: null,
+      jsonlExists: false,
+      hasTokens: false,
+      updatedAtMs: null,
+      sessionStartedAtMs: null,
+      sessionId: null,
+    };
   }
   const agent = agentFromSessionKey(sessionKey) || 'main';
@@ -360,6 +383,9 @@ function inspectSessionActivitySignal(sessionKey, sessionsStore) {
   const jsonlPath = getSessionJsonlPath(agent, entry.sessionId);
   const jsonlExists = jsonlPath ? existsSync(jsonlPath) : false;
   const hasTokens = typeof entry.totalTokens === 'number' && entry.totalTokens > 0;
+  const sessionStartedAtMs = toTimestampMs(entry.sessionStartedAt || entry.startedAt);
+  const updatedAtMs = toTimestampMs(entry.updatedAt);
+  const hasStartedSignal = Boolean(entry.sessionId) || sessionStartedAtMs !== null || updatedAtMs !== null;
   let messageCount = null;
   try {
@@ -371,11 +397,14 @@ function inspectSessionActivitySignal(sessionKey, sessionsStore) {
   return {
     found: true,
+    hasStartedSignal,
     hasActivitySignal: jsonlExists || hasTokens || (typeof messageCount === 'number' && messageCount > 0),
     messageCount,
     jsonlExists,
     hasTokens,
-    updatedAtMs: toTimestampMs(entry.updatedAt),
+    updatedAtMs,
+    sessionStartedAtMs,
+    sessionId: entry.sessionId || null,
   };
 }
@@ -385,12 +414,7 @@ function inspectSessionBootstrapFailure(sessionKey, sessionsStore, spawnedAtMs,
   }
   const ageMs = spawnedAtMs ? Date.now() - spawnedAtMs : Infinity;
-  if (ageMs < startupGraceMs || ageMs > startupGraceMs * 2) {
-    return { shouldResolve: false, reason: null, errorMsg: null };
-  }
-  const signal = inspectSessionActivitySignal(sessionKey, sessionsStore);
-  if (signal.hasActivitySignal) {
+  if (ageMs < startupGraceMs) {
     return { shouldResolve: false, reason: null, errorMsg: null };
   }
@@ -403,22 +427,10 @@ function inspectSessionBootstrapFailure(sessionKey, sessionsStore, spawnedAtMs,
     };
   }
-  if (signal.messageCount === 0) {
-    return {
-      shouldResolve: true,
-      reason: 'session entered sessions store but never wrote transcript/history',
-      errorMsg: 'spawn-failure: session entered sessions store but never wrote transcript/history',
-    };
-  }
-  if (signal.updatedAtMs !== null && spawnedAtMs && signal.updatedAtMs <= spawnedAtMs + 5000) {
-    return {
-      shouldResolve: true,
-      reason: 'session entered sessions store but never showed any activity',
-      errorMsg: 'spawn-failure: session entered sessions store but never showed any activity',
-    };
-  }
+  // A Codex session can enter the sessions store before chat.history, JSONL, or
+  // token counters are written. Treat that as "still booting"; the watcher and
+  // job timeout own later failure handling. Only fail fast when the gateway has
+  // recorded an explicit lane error above.
   return { shouldResolve: false, reason: null, errorMsg: null };
 }
@@ -683,7 +695,7 @@ function quoteForSingleQuotedShell(value) {
 }
 /**
- * Schedule a one-shot delivery watcher shell job for a dispatch label.
+ * Schedule a quick-poll delivery watcher shell job for a dispatch label.
  * Used both for the initial watcher registration and SIGTERM handoffs.
  */
 function scheduleDeliveryWatcherJob({
@@ -704,13 +716,19 @@ function scheduleDeliveryWatcherJob({
   const watcherTimeoutS = Number(timeoutSeconds) + 120;
   const idleThresholdS = Number(idleThresholdSeconds) || 300;
   const sq = quoteForSingleQuotedShell;
-  const watcherCmd = `DISPATCH_LABELS_PATH='${sq(LABELS_PATH)}' '${sq(process.execPath)}' '${sq(watcherPath)}' --label '${sq(label)}' --timeout ${watcherTimeoutS} --poll-interval 20 --idle-threshold ${idleThresholdS}`;
+  const watcherCmd =
+    `DISPATCH_LABELS_PATH='${sq(LABELS_PATH)}' ` +
+    `DISPATCH_INDEX_PATH='${sq(join(__dirname, 'index.mjs'))}' ` +
+    `'${sq(process.execPath)}' '${sq(watcherPath)}' ` +
+    `--label '${sq(label)}' --timeout ${watcherTimeoutS} ` +
+    `--poll-interval 20 --idle-threshold ${idleThresholdS} --once`;
   const nowUtc = new Date().toISOString().replace('T', ' ').slice(0, 19);
   const jobSpec = {
     name:                     `${agentBrand}-deliver:${label}${nameSuffix}`,
-    schedule_kind:            'at',
-    schedule_at:              nowUtc,
+    schedule_kind:            'cron',
+    schedule_cron:            config.deliver_watcher_cron || '* * * * *',
+    next_run_at:              nowUtc,
     session_target:           'shell',
     payload_kind:             'shellCommand',
     payload_message:          watcherCmd,
@@ -720,8 +738,7 @@ function scheduleDeliveryWatcherJob({
     delivery_guarantee:       'at-least-once',
     ttl_hours:                config.deliver_watcher_ttl_hours ?? 48,
     overlap_policy:           'skip',
-    run_timeout_ms:           Math.max(watcherTimeoutS, 4 * 3600) * 1000
-                              + 420 * 1000,
+    run_timeout_ms:           120_000,
     delete_after_run:         1,
     origin:                   origin || 'system',
   };
@@ -1088,9 +1105,10 @@ async function cmdEnqueue(flags) {
     }
     // -- Register scheduler watcher for delivery ---------------
-    // Creates a one-shot shell job that runs watcher.mjs (blocks until session
-    // completes, outputs result). The scheduler's handleDelivery delivers with
-    // retry, alias resolution, and audit trail in scheduler.db.
+    // Creates a quick-poll shell job that runs watcher.mjs once per tick. Empty
+    // stdout means "still running" and advances the next tick without delivery.
+    // Terminal stdout goes through the scheduler's handleDelivery with retry,
+    // alias resolution, and audit trail in scheduler.db.
     // The watcher is the only final-delivery path for dispatched jobs.
     const sq = s => String(s).replace(/'/g, "'\\''");
     let schedulerWatcherOk = false;
@@ -1204,9 +1222,10 @@ async function cmdEnqueue(flags) {
     // -- Post-spawn verification (Fix 3) --------------------------------
     // Canary: poll sessions.json up to 3 times at 10s intervals to confirm the
-    // session appeared in the store. Non-fatal -- output is already written above.
-    // If the session never shows up, stderr gets a loud warning and ledger status
-    // is set to 'spawn-warning'. The watcher provides the definitive error path.
+    // session appeared in the store. A session store entry with sessionId or
+    // startedAt/sessionStartedAt is enough: long first turns may not flush JSONL,
+    // token counts, or chat.history until the model call completes. The delivery
+    // watcher owns later completion/failure handling.
     const SPAWN_POLL_MAX = 3;
     const SPAWN_POLL_DELAY_MS = 10_000;
     let spawnConfirmed = false;
@@ -1214,7 +1233,7 @@ async function cmdEnqueue(flags) {
       await sleep(SPAWN_POLL_DELAY_MS);
       const spawnStore = readSessionsStore(agent);
       const signal = inspectSessionActivitySignal(sessionKey, spawnStore);
-      if (signal.hasActivitySignal) {
+      if (signal.hasStartedSignal || signal.hasActivitySignal) {
         spawnConfirmed = true;
         break;
       }
@@ -1972,7 +1991,7 @@ async function cmdDone(flags) {
     // Label was never registered (e.g. direct subagent spawn, not via enqueue).
     // This is not an error -- the work completed, the label just wasn't tracked.
     process.stderr.write(`[${BRAND}] warn: no session found for label "${label}" -- registering as done\n`);
-    setLabel(label, { status: 'done', summary, completion, ...(sha ? { sha } : {}) });
+    setLabelDone(label, { summary, completion, ...(sha ? { sha } : {}) });
     // No watcher is polling for this label, so actively notify via the gateway
     // post office using delivery config from config.json as fallback target.
@@ -2001,8 +2020,7 @@ async function cmdDone(flags) {
     return;
   }
-  setLabel(label, {
-    status:  'done',
+  setLabelDone(label, {
     summary,
     completion,
     ...(sha ? { sha } : {}),

package/dispatch/watcher.mjs CHANGED Viewed

@@ -684,6 +684,112 @@ function getJsonlMidTurnReason(sessionId, agentDir = 'main') {
   return null; // Last assistant entry appears to be a complete text reply -- safe to proceed
 }
+/**
+ * Check the JSONL tail for a pending tool handoff without requiring recent
+ * file activity. Long-running tool calls can leave the transcript flat for
+ * minutes, so stale mtime alone is not enough to declare the agent stuck.
+ *
+ * @param {string} sessionId - Internal session UUID
+ * @param {string} agentDir - Agent directory (default: 'main')
+ * @returns {string|null} reason string if a tool handoff appears pending
+ */
+function getJsonlPendingToolReason(sessionId, agentDir = 'main') {
+  const lastLines = readJsonlLastLines(sessionId, agentDir, 3);
+  if (!lastLines || lastLines.length === 0) return null;
+  const last = lastLines[lastLines.length - 1];
+  if (last?.role === 'assistant') {
+    const content = Array.isArray(last.content) ? last.content : [];
+    const toolUse = content.find(c => c?.type === 'tool_use');
+    if (toolUse) {
+      return `last assistant entry has tool_use (${toolUse.name || 'unknown'}) -- awaiting tool result`;
+    }
+    if (last.type === 'tool_use') {
+      return `last entry is tool_use (${last.name || 'unknown'}) -- awaiting tool result`;
+    }
+  }
+  if (last?.role === 'user') {
+    const content = Array.isArray(last.content) ? last.content : [];
+    if (content.some(c => c?.type === 'tool_result')) {
+      return 'last entry is tool_result (tool executed, awaiting assistant reply)';
+    }
+  }
+  if (last?.type === 'tool_result') {
+    return 'last entry is tool_result (tool executed, awaiting assistant reply)';
+  }
+  return null;
+}
+function parseTimestampMs(value) {
+  if (!value) return null;
+  if (typeof value === 'number') {
+    return Number.isFinite(value) ? value : null;
+  }
+  if (value instanceof Date) {
+    const timestamp = value.getTime();
+    return Number.isFinite(timestamp) ? timestamp : null;
+  }
+  const parsed = Date.parse(value);
+  return Number.isFinite(parsed) ? parsed : null;
+}
+/**
+ * Detect an agent session that has stopped making progress even though the
+ * watcher process itself is still alive and writing lastPing.
+ *
+ * This closes the failure mode where OpenClaw's Codex app-server retires a
+ * timed-out turn, but dispatch status keeps reporting "running" because the
+ * delivery watcher is still polling.
+ */
+function getRunningSessionStallReason(status, thresholdMs) {
+  if (!status?.sessionKey) return null;
+  const sessionAgent = status.sessionKey.split(':')[1] || 'main';
+  const entry = getSessionStoreEntry(status.sessionKey);
+  if (!entry) return null;
+  const sessionId = entry.sessionId || null;
+  const now = Date.now();
+  const activityTimes = [
+    parseTimestampMs(entry.updatedAt),
+    parseTimestampMs(entry.lastActivityAt),
+    parseTimestampMs(entry.sessionStartedAt),
+    parseTimestampMs(entry.startedAt),
+  ].filter(t => typeof t === 'number');
+  const jsonlMtime = sessionId ? getSessionJsonlMtime(sessionId, sessionAgent) : null;
+  if (typeof jsonlMtime === 'number') activityTimes.push(jsonlMtime);
+  if (typeof status?.liveness?.ageMs === 'number' && status.liveness.ageMs < thresholdMs) {
+    return null;
+  }
+  const lastActivityMs = activityTimes.length ? Math.max(...activityTimes) : null;
+  if (lastActivityMs !== null && now - lastActivityMs < thresholdMs) {
+    return null;
+  }
+  const pendingToolReason = sessionId ? getJsonlPendingToolReason(sessionId, sessionAgent) : null;
+  if (pendingToolReason) {
+    process.stderr.write(
+      `[watcher] ${status.label || 'session'} stale telemetry but pending tool handoff detected: ${pendingToolReason}\n`
+    );
+    return null;
+  }
+  const idleMinutes = lastActivityMs === null
+    ? Math.ceil(thresholdMs / 60000)
+    : Math.max(1, Math.floor((now - lastActivityMs) / 60000));
+  return (
+    `agent session stalled: no session/jsonl activity for ~${idleMinutes}min ` +
+    `while delivery watcher remained alive; likely app-server turn retired or stopped producing events`
+  );
+}
 /**
  * Read the last assistant entry's stop_reason from the session JSONL.
  * Returns the stop_reason string (e.g. 'end_turn', 'tool_use') or null if unavailable.
@@ -754,6 +860,7 @@ function markLabelError(label, errorSummary) {
     updateExistingLabel(label, (entry) => {
       if (entry.status === 'done') return false;
       entry.status = 'error';
+      entry.error = errorSummary || 'failed without result';
       entry.summary = errorSummary || 'failed without result';
     });
   } catch (e) {
@@ -761,6 +868,8 @@ function markLabelError(label, errorSummary) {
   }
 }
+let exitZeroOnTerminal = false;
 /**
  * Format and output the delivery message, then exit 0.
  * Also marks the label as done in labels.json before exiting.
@@ -794,7 +903,7 @@ function deliverResult(label, lastReply, fallbackSummary, completionPayload = nu
           `**Error:** ${stderr || 'non-zero exit'}\n\n` +
           `Job marked as \`error\`. The agent may have reported done without completing the actual work.\n`
         );
-        process.exit(1);
+        process.exit(exitZeroOnTerminal ? 0 : 1);
       }
     }
   } catch (loadErr) {
@@ -816,10 +925,17 @@ function deliverResult(label, lastReply, fallbackSummary, completionPayload = nu
       ? completion.deliveryText.slice(0, maxLen) + '\n\n..[truncated]'
       : completion.deliveryText;
     process.stdout.write(`🌶️ *dispatch* [${label}] completed:\n\n${reply}\n`);
-  } else {
-    process.stderr.write(`[watcher] [${label}] completion delivery suppressed (no meaningful reply or summary)\n`);
+    process.exit(0);
   }
-  process.exit(0);
+  const failureSummary = 'completed without a clean user-facing completion';
+  process.stderr.write(`[watcher] [${label}] completion delivery suppressed (no meaningful reply or summary)\n`);
+  markLabelError(label, failureSummary);
+  process.stdout.write(
+    `⚠️ dispatch [${label}] completed, but no clean user-facing completion was captured. ` +
+    `Internal diagnostics were suppressed; check scheduler run logs for details.\n`
+  );
+  process.exit(exitZeroOnTerminal ? 0 : 1);
 }
 function emitInterruptedOutcome(label, summary, result = null) {
@@ -829,12 +945,12 @@ function emitInterruptedOutcome(label, summary, result = null) {
     `⚠️ dispatch [${label}] session went idle before completing -- work may be incomplete` +
     `${formatDiagnosticSnippet(result?.diagnosticReply || result?.lastReply || null)}\n`
   );
-  process.exit(1);
+  process.exit(exitZeroOnTerminal ? 0 : 1);
 }
 function emitTimeoutOutcome(label, message, result = null) {
   process.stdout.write(`${message}${formatDiagnosticSnippet(result?.diagnosticReply || result?.lastReply || null)}\n`);
-  process.exit(1);
+  process.exit(exitZeroOnTerminal ? 0 : 1);
 }
 // -- Watcher heartbeat interval ref --------------------------------------
@@ -869,6 +985,8 @@ const flags = parseFlags(process.argv.slice(2));
 const label       = flags.label;
 const timeoutS    = parseInt(flags.timeout || '600', 10);
 const pollS       = parseInt(flags['poll-interval'] || '20', 10);
+const once        = flags.once === true || flags.once === 'true';
+exitZeroOnTerminal = once;
 // How long a session must be idle before we proactively check result
 const IDLE_RESULT_CHECK_MS = 60000;
@@ -878,6 +996,144 @@ if (!label) {
   process.exit(2);
 }
+function touchWatcherPing(label) {
+  updateExistingLabel(label, (entry) => {
+    if (entry.status !== 'running') return false;
+    entry.lastPing = new Date().toISOString();
+  });
+}
+function markWatcherPending(label, reason = 'target still running') {
+  process.stderr.write(`[watcher] WATCHER_PENDING label=${label} reason=${reason}\n`);
+  process.exit(0);
+}
+function clearWatcherRetryAfter(label) {
+  updateExistingLabel(label, (entry) => {
+    if (!entry.watcherRetryAfter) return false;
+    delete entry.watcherRetryAfter;
+  });
+}
+function handleOnce529(label, errorMsg) {
+  const labels = loadLabels();
+  const entry = labels[label] || {};
+  const retryCount = getRetryCount(label);
+  if (retryCount >= MAX_529_RETRIES) {
+    markLabelError(label, `max_retries_exceeded (${retryCount}x 529): ${errorMsg}`);
+    process.stdout.write(
+      `🌶️ *dispatch* [${label}] failed after ${MAX_529_RETRIES} retries (529 overload)\n` +
+      `Error: ${errorMsg}\n`
+    );
+    process.exit(0);
+  }
+  const retryAfterMs = parseTimestampMs(entry.watcherRetryAfter);
+  if (!retryAfterMs) {
+    const retryResult = attempt529Retry(label, retryCount, errorMsg);
+    if (!retryResult.retry) return handleOnce529(label, errorMsg);
+    updateExistingLabel(label, (current) => {
+      current.watcherRetryAfter = new Date(Date.now() + retryResult.delayMs).toISOString();
+    });
+    markWatcherPending(label, `529 retry scheduled for future tick (${retryResult.delayMs / 1000}s)`);
+  }
+  if (Date.now() < retryAfterMs) {
+    markWatcherPending(label, '529 retry backoff active');
+  }
+  if (respawnSession(label)) {
+    clearWatcherRetryAfter(label);
+    markWatcherPending(label, '529 retry dispatched');
+  }
+  markLabelError(label, `529 retry failed -- could not respawn session: ${errorMsg}`);
+  process.stdout.write(
+    `🌶️ *dispatch* [${label}] 529 retry failed -- could not respawn session\n` +
+    `Error: ${errorMsg}\n`
+  );
+  process.exit(0);
+}
+function runOnceAndExit() {
+  try {
+    touchWatcherPing(label);
+  } catch {
+    // Best-effort -- a quick-poll tick must not fail because heartbeat metadata raced.
+  }
+  const status = dispatch('status', ['--label', label]);
+  if (!status?.ok) {
+    markWatcherPending(label, 'status unavailable');
+  }
+  if (status.status === 'error') {
+    const errorMsg = status.error || status.summary || '';
+    if (is529Error(errorMsg)) {
+      handleOnce529(label, errorMsg);
+    }
+  }
+  if (status.status !== 'running') {
+    const terminalResult = dispatch('result', ['--label', label]);
+    const terminalCompletion = terminalResult?.completion || status?.completion || null;
+    if (status.status === 'done') {
+      const currentRetryCount = getRetryCount(label);
+      if (currentRetryCount > 0) setRetryCount(label, 0);
+      const gwRetryCount = getGwRestartRetryCount(label);
+      if (gwRetryCount > 0) setGwRestartRetryCount(label, 0);
+      deliverResult(label, terminalResult?.lastReply, status.summary, terminalCompletion);
+    }
+    if (status.status === 'interrupted') {
+      emitInterruptedOutcome(label, status.summary, terminalResult);
+    }
+    const summary = status.error || status.summary || `terminal failure (${status.status || 'unknown'})`;
+    markLabelError(label, summary);
+    process.stdout.write(`🌶️ *dispatch* [${label}] failed\nSummary: ${summary}\n`);
+    process.exit(0);
+  }
+  if (status.sessionKey) {
+    const entry = getSessionStoreEntry(status.sessionKey);
+    const sessionId = entry?.sessionId || null;
+    const sessionAgent = status.sessionKey.split(':')[1] || 'main';
+    const terminalJsonlReply = sessionId ? getSessionTerminalReply(sessionId, sessionAgent) : null;
+    if (sessionId && terminalJsonlReply && isSessionCleanlyFinished(sessionId, sessionAgent)) {
+      const result = dispatch('result', ['--label', label]);
+      deliverResult(label, result?.lastReply || terminalJsonlReply, 'completed (stop_reason=end_turn)', result?.completion || null);
+    }
+  }
+  const ageMs = status.liveness?.ageMs;
+  if (ageMs != null && ageMs >= IDLE_RESULT_CHECK_MS) {
+    const result = dispatch('result', ['--label', label]);
+    if (result?.lastReply || hasCompletionSignal(result?.completion)) {
+      deliverResult(label, result?.lastReply || null, null, result?.completion || null);
+    }
+    const stallReason = getRunningSessionStallReason(status, IDLE_RESULT_CHECK_MS);
+    if (stallReason) {
+      process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
+      markLabelError(label, stallReason);
+      process.stdout.write(
+        `❌ *dispatch* [${label}] failed\n` +
+        `Summary: ${stallReason}\n`
+      );
+      process.exit(0);
+    }
+  }
+  markWatcherPending(label);
+}
+if (once) {
+  runOnceAndExit();
+}
 // -- Start heartbeat -----------------------------------------------------
 // Write lastPing to labels.json every PING_INTERVAL_MS while the session is
 // still running. The watchdog guard in index.mjs reads lastPing to know this
@@ -1238,6 +1494,17 @@ while (Date.now() < deadline) {
     if (result?.lastReply || hasCompletionSignal(result?.completion)) {
       deliverResult(label, result?.lastReply || null, null, result?.completion || null);
     }
+    const stallReason = getRunningSessionStallReason(status, IDLE_RESULT_CHECK_MS);
+    if (stallReason) {
+      process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
+      markLabelError(label, stallReason);
+      process.stdout.write(
+        `❌ *dispatch* [${label}] failed\n` +
+        `Summary: ${stallReason}\n`
+      );
+      process.exit(1);
+    }
   }

package/dispatcher-strategies.js CHANGED Viewed

@@ -1095,6 +1095,25 @@ export async function executeMain(job, ctx, deps) {
 // -- Strategy: Shell -----------------------------------------
+function isCompletionDeliveryWatcherJob(job) {
+  return /^(?:dispatch|chilisaus)-deliver:/.test(String(job?.name || ''));
+}
+function isCompletionWatcherPendingTick(shellResult) {
+  return !(shellResult.stdout || '').trim()
+    && /\bWATCHER_PENDING\b/.test(shellResult.stderr || '');
+}
+function buildCompletionWatcherNoPayloadMessage(job, shellResult) {
+  const statusLabel = shellResult.status === 'ok'
+    ? 'completed without a deliverable result'
+    : `failed before producing a deliverable result${shellResult.errorMessage ? ` (${shellResult.errorMessage})` : ''}`;
+  return [
+    `⚠️ Completion delivery watcher for ${job.name} ${statusLabel}.`,
+    'No internal diagnostics were delivered as the completion message; check the scheduler run logs for stderr/details.',
+  ].join('\n');
+}
 export async function executeShell(job, ctx, deps) {
   const { runShellCommand, normalizeShellResult, log } = deps;
   const result = makeDefaultResult();
@@ -1129,18 +1148,61 @@ export async function executeShell(job, ctx, deps) {
     shell_stderr_bytes: shellResult.stderrBytes,
   };
-  // Shell delivery logic: announce-always sends on all results, announce sends on error only
-  const announcePayload = shellResult.deliveryText.trim() ? shellResult.deliveryText : shellResult.errorMessage;
-  if (job.delivery_mode === 'announce-always' && announcePayload) {
-    const prefix = shellResult.status === 'ok' ? '' : `\u26a0\ufe0f Shell job failed: ${job.name}\n\n`;
-    result.deliveryOverride = `${prefix}${announcePayload}`;
-  } else if (job.delivery_mode === 'announce' && shellResult.status !== 'ok' && announcePayload) {
-    result.deliveryOverride = announcePayload;
+  if (isCompletionDeliveryWatcherJob(job)) {
+    const watcherStdout = (shellResult.stdout || '').trim();
+    const watcherStderr = (shellResult.stderr || '').trim();
+    if (isCompletionWatcherPendingTick(shellResult)) {
+      result.status = 'skipped';
+      result.summary = 'Completion delivery watcher pending; target session is still running';
+      result.content = '';
+      result.errorMessage = null;
+      result.idemAction = 'release';
+      result.skipDelivery = true;
+    } else if (watcherStdout) {
+      // Completion watcher stdout is the only user-facing contract.  Stderr is
+      // diagnostics-only and must never be repackaged as a "successful" final
+      // completion if the watcher suppressed the real payload.
+      result.summary = watcherStdout;
+      result.content = watcherStdout;
+      if (['announce', 'announce-always'].includes(job.delivery_mode)) {
+        result.deliveryOverride = watcherStdout;
+      } else {
+        result.skipDelivery = true;
+      }
+    } else {
+      const noPayloadMessage = buildCompletionWatcherNoPayloadMessage(job, shellResult);
+      result.status = 'error';
+      result.summary = noPayloadMessage;
+      result.errorMessage = 'Completion delivery watcher produced no user-facing stdout payload';
+      result.content = noPayloadMessage;
+      if (['announce', 'announce-always'].includes(job.delivery_mode)) {
+        result.deliveryOverride = noPayloadMessage;
+      } else {
+        result.skipDelivery = true;
+      }
+      log('warn', `Completion watcher produced no deliverable stdout: ${job.name}`, {
+        runId: ctx.run.id,
+        shellStatus: shellResult.status,
+        exitCode: shellResult.exitCode,
+        stderrExcerpt: watcherStderr.slice(0, 500),
+        skippedOrDisabled: /\b(?:skipped|disabled)\b/i.test(watcherStderr),
+      });
+    }
   } else {
-    result.skipDelivery = true;
+    // Shell delivery logic: announce-always sends on all results, announce sends on error only
+    const announcePayload = shellResult.deliveryText.trim() ? shellResult.deliveryText : shellResult.errorMessage;
+    if (job.delivery_mode === 'announce-always' && announcePayload) {
+      const prefix = shellResult.status === 'ok' ? '' : `\u26a0\ufe0f Shell job failed: ${job.name}\n\n`;
+      result.deliveryOverride = `${prefix}${announcePayload}`;
+    } else if (job.delivery_mode === 'announce' && shellResult.status !== 'ok' && announcePayload) {
+      result.deliveryOverride = announcePayload;
+    } else {
+      result.skipDelivery = true;
+    }
   }
-  log('info', `Shell ${shellResult.status}: ${job.name}`, {
+  log('info', `Shell ${result.status}: ${job.name}`, {
     runId: ctx.run.id,
     exitCode: shellResult.exitCode,
     signal: shellResult.signal,
@@ -1156,11 +1218,16 @@ export async function executeAgent(job, ctx, deps) {
   const {
     waitForGateway, updateRunSession, setAgentStatus,
     buildJobPrompt, runAgentTurnWithActivityTimeout,
+    // Sanctioned isolated dispatch primitive. Falls back to the activity-aware
+    // runner when callers (e.g. tests) wire only the older name -- both helpers
+    // share the same HTTP-only contract, no subprocess spawn.
+    runIsolatedAgentTurn,
     updateContextSummary, releaseDispatch, releaseIdempotencyKey,
     updateJob, matchesSentinel, detectTransientError,
     listSessions,
     sqliteNow, log,
   } = deps;
+  const dispatchAgentTurn = runIsolatedAgentTurn || runAgentTurnWithActivityTimeout;
   const result = makeDefaultResult();
   // Gateway health check
@@ -1254,7 +1321,12 @@ export async function executeAgent(job, ctx, deps) {
     }
   }
-  const turnResult = await runAgentTurnWithActivityTimeout({
+  // Isolated dispatch primitive: HTTP-only chat completions call. The
+  // scheduler must never fork a sibling `openclaw` process to spawn an
+  // isolated session -- that variant has historically SIGTERM'd the
+  // launchd-tracked gateway parent and orphaned a node process on port
+  // 18789 (see ISOLATED_DISPATCH_PRIMITIVE in gateway.js).
+  const turnResult = await dispatchAgentTurn({
     message: prompt,
     agentId: job.agent_id || 'main',
     sessionKey,

package/dispatcher.js CHANGED Viewed

@@ -51,7 +51,8 @@ import {
 import { buildRetrievalContext } from './retrieval.js';
 import { upsertAgent, setAgentStatus } from './agents.js';
 import {
-  runAgentTurnWithActivityTimeout, sendSystemEvent, getAllSubAgentSessions, listSessions,
+  runAgentTurnWithActivityTimeout, runIsolatedAgentTurn,
+  sendSystemEvent, getAllSubAgentSessions, listSessions,
   deliverMessage, checkGatewayHealth, waitForGateway, resolveDeliveryAlias,
   applyAuthProfileToSessionStore,
   syncAuthStoreToSession,
@@ -306,6 +307,10 @@ function buildDispatchDeps() {
     // Agent
     waitForGateway, updateRunSession, setAgentStatus,
     buildJobPrompt, runAgentTurnWithActivityTimeout,
+    // Isolated cron-dispatch primitive: HTTP-only wrapper around the
+    // chat-completions API; never forks a sibling openclaw process that
+    // could SIGTERM the launchd-tracked gateway parent.
+    runIsolatedAgentTurn,
     updateContextSummary, releaseIdempotencyKey,
     matchesSentinel, detectTransientError,
     listSessions,

package/gateway.js CHANGED Viewed

@@ -9,6 +9,22 @@ const GATEWAY_URL = process.env.OPENCLAW_GATEWAY_URL || 'http://127.0.0.1:18789'
 const HOME_DIR = process.env.HOME || homedir();
 export const TELEGRAM_MAX_MESSAGE_LENGTH = 4096;
+// -- Isolated dispatch primitive contract --------------------
+//
+// Cron jobs with session_target=isolated must reach the gateway via the
+// public HTTP API only. Forking a sibling `openclaw` process to spawn the
+// session is rejected: in production that primitive has SIGTERM'd the
+// launchd-tracked gateway parent (the child inherits the parent's listening
+// socket on port 18789 and the parent dies), leaving an orphan node process
+// holding the port. See rh-bot.lan zombie-cascade incident report.
+//
+// runIsolatedAgentTurn is the only sanctioned dispatch primitive for
+// session_target=isolated cron jobs. It MUST NOT spawn, fork, or exec any
+// child process. Any future change that needs subprocess execution belongs
+// behind a different, explicitly-named helper so reviewers can keep this
+// contract intact.
+export const ISOLATED_DISPATCH_PRIMITIVE = 'http-chat-completions';
 let _cachedToken;
 let _tokenLoaded = false;
@@ -246,6 +262,29 @@ export async function runAgentTurnWithActivityTimeout(opts) {
   }
 }
+// -- Isolated dispatch primitive -----------------------------
+/**
+ * Sanctioned dispatch primitive for session_target=isolated cron jobs.
+ *
+ * This is a thin wrapper around runAgentTurnWithActivityTimeout that names
+ * the contract: HTTP-only request to the gateway, no child process spawn.
+ * The scheduler routes every session_target=isolated job through this
+ * helper so the no-fork invariant is reviewable at one call site and
+ * testable in isolation (see the no-subprocess regression test in test.js).
+ *
+ * Why a named wrapper instead of calling runAgentTurnWithActivityTimeout
+ * directly: the dispatch primitive is the load-bearing surface that the
+ * rh-bot.lan zombie-on-port outage cascaded through. A named entry point
+ * gives operators and reviewers a single grep target ("runIsolatedAgentTurn")
+ * to audit the no-spawn invariant.
+ *
+ * Accepts the same options as runAgentTurnWithActivityTimeout.
+ */
+export async function runIsolatedAgentTurn(opts) {
+  return await runAgentTurnWithActivityTimeout(opts);
+}
 // -- System Events (main session) ----------------------------
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "openclaw-scheduler",
-  "version": "0.2.5",
+  "version": "0.2.6",
   "description": "SQLite-backed job scheduler and workflow engine for OpenClaw agents",
   "type": "module",
   "main": "./index.js",