openclaw-scheduler 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -291,8 +291,8 @@ No manual token configuration needed on a standard OpenClaw install.
291
291
 
292
292
  When `--deliver-to` is set, dispatch registers a **scheduler watcher job**
293
293
  after dispatching the session. The watcher polls the session result every
294
- minute until the agent produces a reply, then delivers via the scheduler's
295
- `handleDelivery` pipeline.
294
+ minute until the agent sends the structured `done` completion signal, then
295
+ delivers via the scheduler's `handleDelivery` pipeline.
296
296
 
297
297
  ```
298
298
  dispatch enqueue --deliver-to <telegram-user-id>
@@ -316,6 +316,20 @@ dispatch enqueue --deliver-to <telegram-user-id>
316
316
  Exit 1 with no output = retry on next cron tick (no spam — `announce-always`
317
317
  only delivers when `output.trim()` is truthy).
318
318
 
319
+ Quiet sessions are treated conservatively. The watcher does not mark a running
320
+ job failed just because `sessions.json` or the JSONL transcript has been quiet
321
+ for 60 seconds. For high/xhigh reasoning work, the first idle result probe waits
322
+ at least 10 minutes, idle auto-resolution waits at least 20 minutes, and the hard
323
+ failure ceiling is longer than the requested task timeout. Missing or ambiguous
324
+ gateway/session liveness fails open to "still monitoring" until the hard timeout
325
+ window or a clear terminal error.
326
+
327
+ While a label is still `running`, a plain assistant reply is diagnostic only.
328
+ Successful final delivery requires the agent-side `done` signal and its
329
+ structured completion payload. If an older watcher records an error and the
330
+ worker later sends a valid `done`, the later completion is authoritative and the
331
+ stale error is cleared from the label.
332
+
319
333
  ### Progress check-ins from subagent sessions
320
334
 
321
335
  Subagent sessions run without PATH access to the `openclaw` CLI, so
@@ -40,6 +40,7 @@ import {
40
40
  hasCompletionSignal,
41
41
  taskRequiresGitSha,
42
42
  } from './completion.mjs';
43
+ import { getDispatchLivenessPolicy } from './liveness.mjs';
43
44
  import { onStarted, onFinished, onStuck } from './hooks.mjs';
44
45
  import { resolveMessageInput } from './message-input.mjs';
45
46
  import { buildDispatchDeliverySurface } from '../scripts/dispatch-cli-utils.mjs';
@@ -211,9 +212,9 @@ function setLabelDone(name, data) {
211
212
  ...current[name],
212
213
  ...data,
213
214
  status: 'done',
215
+ error: null,
214
216
  updatedAt: new Date().toISOString(),
215
217
  };
216
- delete current[name].error;
217
218
  });
218
219
  return labels[name];
219
220
  }
@@ -1311,16 +1312,17 @@ function cmdStatus(flags) {
1311
1312
  //
1312
1313
  // PING_STALE_MS: 3x the 60s ping interval -- if we haven't heard from the
1313
1314
  // watcher in 3 min, it's probably dead; fall through to check.
1314
- // hardCeilingMs: job timeout * 1.5 -- absolute max regardless of ping age.
1315
- // Catches zombie watchers (watcher alive but session is stuck).
1316
- // idleThresholdMs: max(job timeout, 10 min) -- replaces the old hardcoded 10-min
1317
- // threshold so longer jobs aren't killed at exactly 10 min.
1318
- const PING_STALE_MS = 3 * 60 * 1000;
1319
- const idleThresholdMs = Math.max((entry.timeoutSeconds || 600) * 1000, 10 * 60 * 1000);
1320
- // hardCeilingMs must be >= idleThresholdMs to avoid the ceiling undercutting the
1321
- // idle floor (e.g. timeoutSeconds=300 -> ceiling=7.5 min < idle=10 min would force
1322
- // zombie-guard threshold for sessions that should still use idleThresholdMs).
1323
- const hardCeilingMs = Math.max((entry.timeoutSeconds || 600) * 1000 * 1.5, idleThresholdMs * 1.5);
1315
+ // hardCeilingMs: timeout/reasoning-aware hard ceiling. High-thinking
1316
+ // work gets a larger quiet window before hard failure.
1317
+ // idleThresholdMs: timeout/reasoning-aware quiet threshold. Ambiguous or
1318
+ // missing liveness stays running until these thresholds.
1319
+ const livenessPolicy = getDispatchLivenessPolicy(entry, {
1320
+ startupGraceMs: STARTUP_GRACE_MS,
1321
+ defaultTimeoutSeconds: 600,
1322
+ });
1323
+ const PING_STALE_MS = livenessPolicy.pingStaleMs;
1324
+ const idleThresholdMs = livenessPolicy.idleFailureMs;
1325
+ const hardCeilingMs = livenessPolicy.hardCeilingMs;
1324
1326
 
1325
1327
  let check;
1326
1328
  if (ageMs < STARTUP_GRACE_MS) {
@@ -1333,13 +1335,13 @@ function cmdStatus(flags) {
1333
1335
  check = { shouldResolve: false };
1334
1336
  } else {
1335
1337
  // Ping stale OR past hard ceiling: fall through to session store check
1336
- const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
1338
+ const thresh = ageMs >= hardCeilingMs ? livenessPolicy.hardTimeoutIdleMs : idleThresholdMs;
1337
1339
  check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
1338
1340
  }
1339
1341
  } else {
1340
1342
  // No lastPing -- backward compat (sessions dispatched before heartbeat feature).
1341
1343
  // Use idleThresholdMs (job-aware) instead of the old hardcoded 10 min.
1342
- const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
1344
+ const thresh = ageMs >= hardCeilingMs ? livenessPolicy.hardTimeoutIdleMs : idleThresholdMs;
1343
1345
  check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
1344
1346
  }
1345
1347
 
@@ -1616,10 +1618,13 @@ function cmdSync(flags) {
1616
1618
  // -- Heartbeat-based liveness guard (mirrors cmdStatus logic) ---------
1617
1619
  // Skip auto-resolve when the watcher's lastPing heartbeat is fresh.
1618
1620
  // See cmdStatus for full commentary on PING_STALE_MS / hardCeilingMs.
1619
- const PING_STALE_MS_SYNC = 3 * 60 * 1000;
1620
- const idleThresholdMsSync = Math.max((entry.timeoutSeconds || 600) * 1000, 10 * 60 * 1000);
1621
- // hardCeilingMsSync must be >= idleThresholdMsSync (mirrors cmdStatus fix).
1622
- const hardCeilingMsSync = Math.max((entry.timeoutSeconds || 600) * 1000 * 1.5, idleThresholdMsSync * 1.5);
1621
+ const syncPolicy = getDispatchLivenessPolicy(entry, {
1622
+ startupGraceMs: STARTUP_GRACE_MS_SYNC,
1623
+ defaultTimeoutSeconds: 600,
1624
+ });
1625
+ const PING_STALE_MS_SYNC = syncPolicy.pingStaleMs;
1626
+ const idleThresholdMsSync = syncPolicy.idleFailureMs;
1627
+ const hardCeilingMsSync = syncPolicy.hardCeilingMs;
1623
1628
 
1624
1629
  if (entry.lastPing) {
1625
1630
  const pingAgeMs = Date.now() - new Date(entry.lastPing).getTime();
@@ -1629,7 +1634,7 @@ function cmdSync(flags) {
1629
1634
  }
1630
1635
  }
1631
1636
 
1632
- const syncThresh = elapsedMs >= hardCeilingMsSync ? 2 * 60 * 1000 : idleThresholdMsSync;
1637
+ const syncThresh = elapsedMs >= hardCeilingMsSync ? syncPolicy.hardTimeoutIdleMs : idleThresholdMsSync;
1633
1638
  const check = checkSessionDone(entry.sessionKey, syncStore, syncThresh, true, spawnedAtMs);
1634
1639
 
1635
1640
  if (check.shouldResolve) {
@@ -0,0 +1,61 @@
1
+ const MINUTE_MS = 60 * 1000;
2
+
3
+ function numberOrNull(value) {
4
+ const n = Number(value);
5
+ return Number.isFinite(n) && n > 0 ? n : null;
6
+ }
7
+
8
+ export function normalizeThinkingLevel(value) {
9
+ const text = typeof value === 'string' ? value.trim().toLowerCase() : '';
10
+ if (text === 'xhigh' || text === 'extra-high' || text === 'extra_high') return 'xhigh';
11
+ if (text === 'high') return 'high';
12
+ if (text === 'low') return 'low';
13
+ if (text === 'off' || text === 'none') return 'off';
14
+ return null;
15
+ }
16
+
17
+ export function getDispatchTimeoutSeconds(entry = {}, fallbackSeconds = 300) {
18
+ return numberOrNull(entry.timeoutSeconds)
19
+ ?? numberOrNull(entry.timeout)
20
+ ?? numberOrNull(fallbackSeconds)
21
+ ?? 300;
22
+ }
23
+
24
+ export function getDispatchLivenessPolicy(entry = {}, opts = {}) {
25
+ const now = numberOrNull(opts.now) ?? Date.now();
26
+ const timeoutSeconds = getDispatchTimeoutSeconds(entry, opts.defaultTimeoutSeconds);
27
+ const timeoutMs = timeoutSeconds * 1000;
28
+ const thinking = normalizeThinkingLevel(entry.thinking);
29
+ const isHighThinking = thinking === 'high' || thinking === 'xhigh';
30
+
31
+ const startupGraceMs = numberOrNull(opts.startupGraceMs)
32
+ ?? (isHighThinking ? 10 * MINUTE_MS : 5 * MINUTE_MS);
33
+ const pingStaleMs = numberOrNull(opts.pingStaleMs) ?? 3 * MINUTE_MS;
34
+ const idleProbeFloorMs = isHighThinking ? 10 * MINUTE_MS : 1 * MINUTE_MS;
35
+ const idleProbeMs = Math.max(
36
+ idleProbeFloorMs,
37
+ Math.min(timeoutMs * 0.25, isHighThinking ? 15 * MINUTE_MS : 5 * MINUTE_MS),
38
+ );
39
+ const idleFailureFloorMs = isHighThinking ? 20 * MINUTE_MS : 10 * MINUTE_MS;
40
+ const idleFailureMs = Math.max(timeoutMs, idleFailureFloorMs);
41
+ const hardCeilingMs = Math.max(timeoutMs * 1.5, idleFailureMs * (isHighThinking ? 2 : 1.5));
42
+ const hardTimeoutIdleMs = isHighThinking ? 5 * MINUTE_MS : 2 * MINUTE_MS;
43
+ const spawnedAtMs = entry.spawnedAt ? new Date(entry.spawnedAt).getTime() : 0;
44
+ const ageMs = spawnedAtMs ? now - spawnedAtMs : Infinity;
45
+
46
+ return {
47
+ thinking,
48
+ isHighThinking,
49
+ timeoutSeconds,
50
+ timeoutMs,
51
+ startupGraceMs,
52
+ pingStaleMs,
53
+ idleProbeMs,
54
+ idleFailureMs,
55
+ hardCeilingMs,
56
+ hardTimeoutIdleMs,
57
+ spawnedAtMs,
58
+ ageMs,
59
+ pastHardCeiling: ageMs >= hardCeilingMs,
60
+ };
61
+ }
@@ -36,6 +36,7 @@ import {
36
36
  hasCompletionSignal,
37
37
  resolveCompletionDelivery,
38
38
  } from './completion.mjs';
39
+ import { getDispatchLivenessPolicy } from './liveness.mjs';
39
40
  import { sendMessage } from '../messages.js';
40
41
 
41
42
  const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -988,8 +989,14 @@ const pollS = parseInt(flags['poll-interval'] || '20', 10);
988
989
  const once = flags.once === true || flags.once === 'true';
989
990
  exitZeroOnTerminal = once;
990
991
 
991
- // How long a session must be idle before we proactively check result
992
- const IDLE_RESULT_CHECK_MS = 60000;
992
+ function getCurrentLivenessPolicy() {
993
+ const entry = loadLabels()[label] || { timeoutSeconds: timeoutS };
994
+ return getDispatchLivenessPolicy(entry, { defaultTimeoutSeconds: timeoutS });
995
+ }
996
+
997
+ function hasStructuredCompletion(result) {
998
+ return hasCompletionSignal(result?.completion);
999
+ }
993
1000
 
994
1001
  if (!label) {
995
1002
  process.stderr.write('[watcher] --label is required\n');
@@ -1104,18 +1111,22 @@ function runOnceAndExit() {
1104
1111
  const terminalJsonlReply = sessionId ? getSessionTerminalReply(sessionId, sessionAgent) : null;
1105
1112
  if (sessionId && terminalJsonlReply && isSessionCleanlyFinished(sessionId, sessionAgent)) {
1106
1113
  const result = dispatch('result', ['--label', label]);
1107
- deliverResult(label, result?.lastReply || terminalJsonlReply, 'completed (stop_reason=end_turn)', result?.completion || null);
1114
+ if (hasStructuredCompletion(result)) {
1115
+ deliverResult(label, result?.lastReply || terminalJsonlReply, 'completed (stop_reason=end_turn)', result?.completion || null);
1116
+ }
1117
+ process.stderr.write(`[watcher] stop_reason=end_turn observed without completion signal -- continuing to monitor\n`);
1108
1118
  }
1109
1119
  }
1110
1120
 
1111
1121
  const ageMs = status.liveness?.ageMs;
1112
- if (ageMs != null && ageMs >= IDLE_RESULT_CHECK_MS) {
1122
+ const idleResultCheckMs = getCurrentLivenessPolicy().idleProbeMs;
1123
+ if (ageMs != null && ageMs >= idleResultCheckMs) {
1113
1124
  const result = dispatch('result', ['--label', label]);
1114
- if (result?.lastReply || hasCompletionSignal(result?.completion)) {
1125
+ if (hasStructuredCompletion(result)) {
1115
1126
  deliverResult(label, result?.lastReply || null, null, result?.completion || null);
1116
1127
  }
1117
1128
 
1118
- const stallReason = getRunningSessionStallReason(status, IDLE_RESULT_CHECK_MS);
1129
+ const stallReason = getRunningSessionStallReason(status, idleResultCheckMs);
1119
1130
  if (stallReason) {
1120
1131
  process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
1121
1132
  markLabelError(label, stallReason);
@@ -1477,8 +1488,11 @@ while (Date.now() < deadline) {
1477
1488
  if (_sid2a && terminalJsonlReply && isSessionCleanlyFinished(_sid2a, _adir2a)) {
1478
1489
  process.stderr.write(`[watcher] stop_reason=end_turn detected -- delivering early\n`);
1479
1490
  const result = dispatch('result', ['--label', label]);
1480
- deliverResult(label, result?.lastReply || terminalJsonlReply, 'completed (stop_reason=end_turn)', result?.completion || null);
1481
- // deliverResult exits
1491
+ if (hasStructuredCompletion(result)) {
1492
+ deliverResult(label, result?.lastReply || terminalJsonlReply, 'completed (stop_reason=end_turn)', result?.completion || null);
1493
+ // deliverResult exits
1494
+ }
1495
+ process.stderr.write(`[watcher] stop_reason=end_turn observed without completion signal -- continuing to monitor\n`);
1482
1496
  }
1483
1497
  }
1484
1498
 
@@ -1489,13 +1503,14 @@ while (Date.now() < deadline) {
1489
1503
  // while this watcher's lastPing heartbeat is fresh (written every 60s);
1490
1504
  // this path handles normal completion before the ping goes stale.
1491
1505
  const ageMs = status.liveness?.ageMs;
1492
- if (ageMs != null && ageMs >= IDLE_RESULT_CHECK_MS) {
1506
+ const idleResultCheckMs = getCurrentLivenessPolicy().idleProbeMs;
1507
+ if (ageMs != null && ageMs >= idleResultCheckMs) {
1493
1508
  const result = dispatch('result', ['--label', label]);
1494
- if (result?.lastReply || hasCompletionSignal(result?.completion)) {
1509
+ if (hasStructuredCompletion(result)) {
1495
1510
  deliverResult(label, result?.lastReply || null, null, result?.completion || null);
1496
1511
  }
1497
1512
 
1498
- const stallReason = getRunningSessionStallReason(status, IDLE_RESULT_CHECK_MS);
1513
+ const stallReason = getRunningSessionStallReason(status, idleResultCheckMs);
1499
1514
  if (stallReason) {
1500
1515
  process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
1501
1516
  markLabelError(label, stallReason);
@@ -1577,7 +1592,7 @@ if (sessionInternalId) {
1577
1592
  // If the session already completed (gateway pruned it -> null tokens), exit cleanly.
1578
1593
  if (statusAtDeadline?.status === 'done' || baselineTokens === null) {
1579
1594
  const r = dispatch('result', ['--label', label]);
1580
- if (r?.lastReply || hasCompletionSignal(r?.completion)) {
1595
+ if (hasStructuredCompletion(r)) {
1581
1596
  // deliverResult calls process.exit(0) internally
1582
1597
  deliverResult(label, r?.lastReply || null, statusAtDeadline?.summary || null, r?.completion || null);
1583
1598
  }
@@ -1616,7 +1631,7 @@ while (Date.now() - flatSince < FLAT_WINDOW_MS) {
1616
1631
  deliverResult(label, r?.lastReply || null, st.summary, r?.completion || st?.completion || null);
1617
1632
  }
1618
1633
  const r2 = dispatch('result', ['--label', label]);
1619
- if (r2?.lastReply || hasCompletionSignal(r2?.completion)) {
1634
+ if (hasStructuredCompletion(r2)) {
1620
1635
  // deliverResult calls process.exit(0) internally
1621
1636
  deliverResult(label, r2?.lastReply || null, null, r2?.completion || null);
1622
1637
  }
@@ -1710,7 +1725,7 @@ if (sessionInternalId) {
1710
1725
  deliverResult(label, rExt?.lastReply || null, stExt.summary, rExt?.completion || stExt?.completion || null);
1711
1726
  }
1712
1727
  const rExt2 = dispatch('result', ['--label', label]);
1713
- if (rExt2?.lastReply || hasCompletionSignal(rExt2?.completion)) {
1728
+ if (hasStructuredCompletion(rExt2)) {
1714
1729
  // deliverResult calls process.exit(0) internally
1715
1730
  deliverResult(label, rExt2?.lastReply || null, null, rExt2?.completion || null);
1716
1731
  }
@@ -1767,7 +1782,7 @@ for (const round of steerRounds) {
1767
1782
  deliverResult(label, r3?.lastReply || null, st2.summary, r3?.completion || st2?.completion || null);
1768
1783
  }
1769
1784
  const r3 = dispatch('result', ['--label', label]);
1770
- if (r3?.lastReply || hasCompletionSignal(r3?.completion)) {
1785
+ if (hasStructuredCompletion(r3)) {
1771
1786
  // deliverResult calls process.exit(0) internally
1772
1787
  deliverResult(label, r3?.lastReply || null, null, r3?.completion || null);
1773
1788
  }
@@ -1782,7 +1797,7 @@ for (const round of steerRounds) {
1782
1797
  if (st3?.status === 'done') {
1783
1798
  // Check if a result was captured before marking as error
1784
1799
  const r4 = dispatch('result', ['--label', label]);
1785
- if (r4?.lastReply || hasCompletionSignal(r4?.completion)) {
1800
+ if (hasStructuredCompletion(r4)) {
1786
1801
  deliverResult(label, r4?.lastReply || null, st3.summary, r4?.completion || st3?.completion || null); // deliverResult calls process.exit(0)
1787
1802
  }
1788
1803
  markLabelError(label, 'timed out -- killed after steer attempts (no result captured)');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "openclaw-scheduler",
3
- "version": "0.2.6",
3
+ "version": "0.2.7",
4
4
  "description": "SQLite-backed job scheduler and workflow engine for OpenClaw agents",
5
5
  "type": "module",
6
6
  "main": "./index.js",
@@ -42,6 +42,7 @@
42
42
  "dispatch/deliver-watcher.sh",
43
43
  "dispatch/hooks.mjs",
44
44
  "dispatch/index.mjs",
45
+ "dispatch/liveness.mjs",
45
46
  "dispatch/message-input.mjs",
46
47
  "dispatch/README.md",
47
48
  "dispatch/watcher.mjs",