openclaw-scheduler 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,8 +32,17 @@ import { randomUUID } from 'crypto';
32
32
  import { execFileSync } from 'child_process';
33
33
  import { homedir } from 'os';
34
34
  import Database from 'better-sqlite3';
35
- import { buildTerminalCompletionPayload } from './completion.mjs';
35
+ import {
36
+ buildCompletionSignalInstructions,
37
+ buildTerminalCompletionPayload,
38
+ extractLastMeaningfulAssistantReplyFromEntries,
39
+ extractTerminalAssistantReplyFromEntries,
40
+ hasCompletionSignal,
41
+ taskRequiresGitSha,
42
+ } from './completion.mjs';
36
43
  import { onStarted, onFinished, onStuck } from './hooks.mjs';
44
+ import { resolveMessageInput } from './message-input.mjs';
45
+ import { buildDispatchDeliverySurface } from '../scripts/dispatch-cli-utils.mjs';
37
46
 
38
47
  const __dirname = dirname(fileURLToPath(import.meta.url));
39
48
  const HOME_DIR = process.env.HOME || homedir();
@@ -110,6 +119,15 @@ function sleep(ms) {
110
119
  return new Promise(r => setTimeout(r, ms));
111
120
  }
112
121
 
122
+ function toTimestampMs(value) {
123
+ if (value == null) return null;
124
+ if (typeof value === 'number') {
125
+ return value < 1e12 ? value * 1000 : value;
126
+ }
127
+ const parsed = new Date(value).getTime();
128
+ return Number.isFinite(parsed) ? parsed : null;
129
+ }
130
+
113
131
  /** Parse --flag value pairs from argv (supports both --flag value and --flag=value) */
114
132
  function parseFlags(argv) {
115
133
  const flags = {};
@@ -131,21 +149,6 @@ function parseFlags(argv) {
131
149
  return flags;
132
150
  }
133
151
 
134
- function taskRequiresGitSha(taskPrompt) {
135
- if (!taskPrompt || typeof taskPrompt !== 'string') return false;
136
-
137
- const commandPattern = /\bgit\s+(push|rebase|cherry-pick)\b|(?:^|\s)--force-with-lease\b|(?:^|\s)--force-push\b/ig;
138
- let match;
139
- while ((match = commandPattern.exec(taskPrompt)) !== null) {
140
- const before = taskPrompt.slice(Math.max(0, match.index - 40), match.index);
141
- const negatedContext = /\b(?:do\s+not|don't|dont|never)\s+(?:use|run|call|invoke)?\s*$/i.test(before)
142
- || /\bavoid\s+(?:using\s+)?$/i.test(before)
143
- || /\bwithout\s+(?:using\s+)?$/i.test(before);
144
- if (!negatedContext) return true;
145
- }
146
- return false;
147
- }
148
-
149
152
  // -- Labels Ledger --------------------------------------------
150
153
 
151
154
  function getLabelsSignature() {
@@ -202,6 +205,19 @@ function setLabel(name, data) {
202
205
  return labels[name];
203
206
  }
204
207
 
208
+ function setLabelDone(name, data) {
209
+ const labels = mutateLabels((current) => {
210
+ current[name] = {
211
+ ...current[name],
212
+ ...data,
213
+ status: 'done',
214
+ updatedAt: new Date().toISOString(),
215
+ };
216
+ delete current[name].error;
217
+ });
218
+ return labels[name];
219
+ }
220
+
205
221
  // -- Gateway Calls --------------------------------------------
206
222
 
207
223
  /**
@@ -247,23 +263,16 @@ function gatewayCall(method, params = {}, opts = {}) {
247
263
  // -- Gateway Error Log Check ----------------------------------
248
264
 
249
265
  /**
250
- * Check the gateway error log for 529/FailoverError/overload errors
266
+ * Check the gateway error log for the most recent diagnostic lane task error
251
267
  * matching a specific session key.
252
268
  *
253
269
  * Scans the last N bytes of gateway.err.log for diagnostic lane task errors
254
- * that reference the session key and match overload patterns.
270
+ * that reference the session key and returns the newest error line.
255
271
  *
256
272
  * @param {string} sessionKey - The session key to check
257
273
  * @returns {{ found: boolean, error: string|null, timestamp: string|null }}
258
274
  */
259
- function check529InGatewayLog(sessionKey) {
260
- const OVERLOAD_PATTERNS = [
261
- /529/i,
262
- /failover\s*error/i,
263
- /overload/i,
264
- /temporarily\s+overloaded/i,
265
- ];
266
-
275
+ function getGatewayLaneTaskError(sessionKey) {
267
276
  try {
268
277
  const logPath = join(HOME_DIR, '.openclaw', 'logs', 'gateway.err.log');
269
278
  if (!existsSync(logPath)) return { found: false, error: null, timestamp: null };
@@ -285,20 +294,15 @@ function check529InGatewayLog(sessionKey) {
285
294
  if (!line.includes(sessionKey)) continue;
286
295
  if (!line.includes('lane task error')) continue;
287
296
 
288
- // Extract the error message
289
297
  const errorMatch = line.match(/error="([^"]+)"/);
290
298
  if (!errorMatch) continue;
291
299
 
292
- const errorMsg = errorMatch[1];
293
- if (OVERLOAD_PATTERNS.some(p => p.test(errorMsg))) {
294
- // Extract timestamp
295
- const tsMatch = line.match(/^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)/);
296
- return {
297
- found: true,
298
- error: `FailoverError (529): ${errorMsg}`,
299
- timestamp: tsMatch ? tsMatch[1] : null,
300
- };
301
- }
300
+ const tsMatch = line.match(/^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)/);
301
+ return {
302
+ found: true,
303
+ error: errorMatch[1],
304
+ timestamp: tsMatch ? tsMatch[1] : null,
305
+ };
302
306
  }
303
307
 
304
308
  return { found: false, error: null, timestamp: null };
@@ -307,6 +311,32 @@ function check529InGatewayLog(sessionKey) {
307
311
  }
308
312
  }
309
313
 
314
+ /**
315
+ * Check the gateway error log for 529/FailoverError/overload errors
316
+ * matching a specific session key.
317
+ *
318
+ * @param {string} sessionKey - The session key to check
319
+ * @returns {{ found: boolean, error: string|null, timestamp: string|null }}
320
+ */
321
+ function check529InGatewayLog(sessionKey) {
322
+ const OVERLOAD_PATTERNS = [
323
+ /529/i,
324
+ /failover\s*error/i,
325
+ /overload/i,
326
+ /temporarily\s+overloaded/i,
327
+ ];
328
+
329
+ const laneError = getGatewayLaneTaskError(sessionKey);
330
+ if (!laneError.found || !laneError.error) return { found: false, error: null, timestamp: null };
331
+ if (!OVERLOAD_PATTERNS.some(p => p.test(laneError.error))) return { found: false, error: null, timestamp: null };
332
+
333
+ return {
334
+ found: true,
335
+ error: `FailoverError (529): ${laneError.error}`,
336
+ timestamp: laneError.timestamp,
337
+ };
338
+ }
339
+
310
340
  // -- Sessions Store (Direct Read) -----------------------------
311
341
 
312
342
  /**
@@ -328,6 +358,103 @@ function readSessionsStore(agent = 'main') {
328
358
  }
329
359
  }
330
360
 
361
+ function getSessionJsonlPath(agent = 'main', sessionId) {
362
+ if (!sessionId) return null;
363
+ return join(HOME_DIR, '.openclaw', 'agents', agent, 'sessions', `${sessionId}.jsonl`);
364
+ }
365
+
366
+ function inspectSessionActivitySignal(sessionKey, sessionsStore) {
367
+ if (!sessionKey || !sessionsStore?.[sessionKey]) {
368
+ return {
369
+ found: false,
370
+ hasStartedSignal: false,
371
+ hasActivitySignal: false,
372
+ messageCount: null,
373
+ jsonlExists: false,
374
+ hasTokens: false,
375
+ updatedAtMs: null,
376
+ sessionStartedAtMs: null,
377
+ sessionId: null,
378
+ };
379
+ }
380
+
381
+ const agent = agentFromSessionKey(sessionKey) || 'main';
382
+ const entry = sessionsStore[sessionKey];
383
+ const jsonlPath = getSessionJsonlPath(agent, entry.sessionId);
384
+ const jsonlExists = jsonlPath ? existsSync(jsonlPath) : false;
385
+ const hasTokens = typeof entry.totalTokens === 'number' && entry.totalTokens > 0;
386
+ const sessionStartedAtMs = toTimestampMs(entry.sessionStartedAt || entry.startedAt);
387
+ const updatedAtMs = toTimestampMs(entry.updatedAt);
388
+ const hasStartedSignal = Boolean(entry.sessionId) || sessionStartedAtMs !== null || updatedAtMs !== null;
389
+ let messageCount = null;
390
+
391
+ try {
392
+ const history = gatewayCall('chat.history', { sessionKey }, { timeout: 8000 });
393
+ if (Array.isArray(history?.messages)) {
394
+ messageCount = history.messages.length;
395
+ }
396
+ } catch {}
397
+
398
+ return {
399
+ found: true,
400
+ hasStartedSignal,
401
+ hasActivitySignal: jsonlExists || hasTokens || (typeof messageCount === 'number' && messageCount > 0),
402
+ messageCount,
403
+ jsonlExists,
404
+ hasTokens,
405
+ updatedAtMs,
406
+ sessionStartedAtMs,
407
+ sessionId: entry.sessionId || null,
408
+ };
409
+ }
410
+
411
+ function inspectSessionBootstrapFailure(sessionKey, sessionsStore, spawnedAtMs, startupGraceMs) {
412
+ if (!sessionKey || !sessionsStore?.[sessionKey]) {
413
+ return { shouldResolve: false, reason: null, errorMsg: null };
414
+ }
415
+
416
+ const ageMs = spawnedAtMs ? Date.now() - spawnedAtMs : Infinity;
417
+ if (ageMs < startupGraceMs) {
418
+ return { shouldResolve: false, reason: null, errorMsg: null };
419
+ }
420
+
421
+ const laneError = getGatewayLaneTaskError(sessionKey);
422
+ if (laneError.found && laneError.error) {
423
+ return {
424
+ shouldResolve: true,
425
+ reason: `diagnostic lane error: ${laneError.error}`,
426
+ errorMsg: `spawn-failure: ${laneError.error}`,
427
+ };
428
+ }
429
+
430
+ // A Codex session can enter the sessions store before chat.history, JSONL, or
431
+ // token counters are written. Treat that as "still booting"; the watcher and
432
+ // job timeout own later failure handling. Only fail fast when the gateway has
433
+ // recorded an explicit lane error above.
434
+ return { shouldResolve: false, reason: null, errorMsg: null };
435
+ }
436
+
437
+ function readJsonlTailEntries(sessionId, agent = 'main', maxLines = 200) {
438
+ if (!sessionId) return null;
439
+ try {
440
+ const jsonlPath = join(HOME_DIR, '.openclaw', 'agents', agent, 'sessions', `${sessionId}.jsonl`);
441
+ return readFileSync(jsonlPath, 'utf-8')
442
+ .split('\n')
443
+ .filter(line => line.trim())
444
+ .slice(-maxLines)
445
+ .map(line => {
446
+ try {
447
+ return JSON.parse(line);
448
+ } catch {
449
+ return null;
450
+ }
451
+ })
452
+ .filter(Boolean);
453
+ } catch {
454
+ return null;
455
+ }
456
+ }
457
+
331
458
  /**
332
459
  * Auto-detect the originating channel from the most recently active main session.
333
460
  * Reads sessions.json, finds sessions active within the last 10 minutes,
@@ -348,6 +475,17 @@ function inferChatType(key, session) {
348
475
  return "";
349
476
  }
350
477
 
478
+ function parseOriginTarget(origin) {
479
+ const match = /^([^:]+):(.+)$/.exec(origin || '');
480
+ if (!match) return { channel: null, target: null };
481
+ return { channel: match[1], target: match[2] };
482
+ }
483
+
484
+ function originFromDeliveryTarget(deliverTo, deliverChannel = 'telegram') {
485
+ if (!deliverTo) return null;
486
+ return `${deliverChannel || 'telegram'}:${deliverTo}`;
487
+ }
488
+
351
489
  function getActiveOriginFromSessions() {
352
490
  const store = readSessionsStore("main");
353
491
  if (!store) return null;
@@ -551,6 +689,70 @@ function disarmWatchdog(label) {
551
689
  }
552
690
  }
553
691
 
692
+
693
+ function quoteForSingleQuotedShell(value) {
694
+ return String(value).replace(/'/g, "'\"'\"'");
695
+ }
696
+
697
+ /**
698
+ * Schedule a quick-poll delivery watcher shell job for a dispatch label.
699
+ * Used both for the initial watcher registration and SIGTERM handoffs.
700
+ */
701
+ function scheduleDeliveryWatcherJob({
702
+ label,
703
+ deliverTo,
704
+ deliverChannel = 'telegram',
705
+ timeoutSeconds = 300,
706
+ idleThresholdSeconds = 300,
707
+ origin = 'system',
708
+ agentBrand = BRAND,
709
+ nameSuffix = '',
710
+ }) {
711
+ if (!label) throw new Error('label is required');
712
+ if (!deliverTo) throw new Error('deliverTo is required');
713
+
714
+ const schedulerCli = join(__dirname, '..', 'cli.js');
715
+ const watcherPath = join(__dirname, 'watcher.mjs');
716
+ const watcherTimeoutS = Number(timeoutSeconds) + 120;
717
+ const idleThresholdS = Number(idleThresholdSeconds) || 300;
718
+ const sq = quoteForSingleQuotedShell;
719
+ const watcherCmd =
720
+ `DISPATCH_LABELS_PATH='${sq(LABELS_PATH)}' ` +
721
+ `DISPATCH_INDEX_PATH='${sq(join(__dirname, 'index.mjs'))}' ` +
722
+ `'${sq(process.execPath)}' '${sq(watcherPath)}' ` +
723
+ `--label '${sq(label)}' --timeout ${watcherTimeoutS} ` +
724
+ `--poll-interval 20 --idle-threshold ${idleThresholdS} --once`;
725
+
726
+ const nowUtc = new Date().toISOString().replace('T', ' ').slice(0, 19);
727
+ const jobSpec = {
728
+ name: `${agentBrand}-deliver:${label}${nameSuffix}`,
729
+ schedule_kind: 'cron',
730
+ schedule_cron: config.deliver_watcher_cron || '* * * * *',
731
+ next_run_at: nowUtc,
732
+ session_target: 'shell',
733
+ payload_kind: 'shellCommand',
734
+ payload_message: watcherCmd,
735
+ delivery_mode: 'announce-always',
736
+ delivery_channel: deliverChannel,
737
+ delivery_to: deliverTo,
738
+ delivery_guarantee: 'at-least-once',
739
+ ttl_hours: config.deliver_watcher_ttl_hours ?? 48,
740
+ overlap_policy: 'skip',
741
+ run_timeout_ms: 120_000,
742
+ delete_after_run: 1,
743
+ origin: origin || 'system',
744
+ };
745
+
746
+ const raw = execFileSync(process.execPath, [schedulerCli, '--json', 'jobs', 'add', JSON.stringify(jobSpec)], {
747
+ encoding: 'utf-8',
748
+ timeout: 10000,
749
+ stdio: ['pipe', 'pipe', 'pipe'],
750
+ });
751
+
752
+ const parsed = JSON.parse(raw.trim());
753
+ return parsed?.job || null;
754
+ }
755
+
554
756
  // -- Session Helpers ------------------------------------------
555
757
 
556
758
  /** Build a unique session key for a new subagent session. */
@@ -565,12 +767,19 @@ function makeSessionKey(agentId) {
565
767
  *
566
768
  * Flags:
567
769
  * --label <string> Required. Human-readable name
568
- * --message <string> Required. Prompt sent to the agent
770
+ * --message <string> Prompt sent to the agent
771
+ * --message-file <path> Read prompt text from a file (`-` = stdin)
772
+ * --message-env <VAR> Read prompt text from an environment variable
773
+ * --message-stdin Read prompt text from stdin explicitly
774
+ * (stdin is also auto-read when piped and no other message source is set)
569
775
  * --agent <string> Agent ID (default: main)
570
776
  * --thinking <string> Reasoning level: low|high|xhigh (default: not set)
571
777
  * --timeout <seconds> Run timeout in seconds (default: 300)
572
- * --origin <origin> Required. Where the job was dispatched from (e.g. "telegram:<your-user-id>", "system")
573
- * --deliver-to <target> Delivery target (e.g. Telegram chat ID). Enables deliver:true on the gateway call.
778
+ * --origin <origin> Explicit dispatch origin for audit/retries (e.g. "telegram:<chat_id>", "system")
779
+ * If omitted but --deliver-to is explicit, dispatch derives origin from that target.
780
+ * Active-session auto-detect is preserved only as a manual/local fallback when both are absent.
781
+ * --deliver-to <target> Delivery target (e.g. Telegram chat ID). Registers the scheduler watcher for durable final delivery.
782
+ * Chat-triggered callers should pass inbound metadata chat_id here, especially for group chats.
574
783
  * Defaults to origin chat ID when --origin is a "telegram:<id>" string.
575
784
  * --deliver-channel <ch> Delivery channel for --deliver-to (default: telegram)
576
785
  * --delivery-mode <mode> announce|announce-always|none (default: announce)
@@ -581,18 +790,23 @@ function makeSessionKey(agentId) {
581
790
  * --model <string> Model override (e.g. anthropic/claude-sonnet-4-6)
582
791
  */
583
792
  async function cmdEnqueue(flags) {
584
- const label = flags.label;
585
- let message = flags.message;
793
+ const label = flags.label;
586
794
  if (!label) die('--label is required', 2);
587
- // Support --message-file for multiline prompts without shell escaping issues
588
- if (!message && flags['message-file']) {
589
- try {
590
- message = readFileSync(flags['message-file'], 'utf-8').trim();
591
- } catch (err) {
592
- die(`--message-file: could not read file: ${err.message}`, 2);
593
- }
795
+
796
+ let message = null;
797
+ try {
798
+ message = await resolveMessageInput({
799
+ message: flags.message,
800
+ messageFile: flags['message-file'],
801
+ messageEnv: flags['message-env'],
802
+ messageStdin: flags['message-stdin'],
803
+ });
804
+ } catch (err) {
805
+ die(err.message, 2);
806
+ }
807
+ if (message === null || message.length === 0) {
808
+ die('--message, --message-file, --message-env, --message-stdin, or piped stdin is required', 2);
594
809
  }
595
- if (!message) die('--message or --message-file is required', 2);
596
810
 
597
811
  const agent = flags.agent || 'main';
598
812
  const thinking = flags.thinking || null;
@@ -605,30 +819,44 @@ async function cmdEnqueue(flags) {
605
819
  process.stderr.write(`[${BRAND}] WARNING: --timeout not specified, defaulting to 300s. ` +
606
820
  `Pass --timeout explicitly (≥1200 for thinking=high tasks) to avoid premature watcher kills.\n`);
607
821
  }
608
- let origin = flags.origin || null;
822
+ const explicitOrigin = flags.origin || null;
823
+ const explicitDeliverTo = flags['deliver-to'] || null;
824
+ const explicitDeliverChannel = flags['deliver-channel'] || null;
825
+ let origin = explicitOrigin;
826
+
827
+ // Contract: chat-triggered callers should pass --deliver-to from inbound
828
+ // metadata chat_id. If they omit --origin, derive it from that explicit
829
+ // delivery target so dispatch never falls back to whichever session happened
830
+ // to be active most recently.
831
+ if (!origin && explicitDeliverTo) {
832
+ origin = originFromDeliveryTarget(explicitDeliverTo, explicitDeliverChannel || 'telegram');
833
+ }
609
834
 
610
- // Auto-detect origin from active sessions if not explicitly provided
611
- if (!origin) {
835
+ // Preserve active-session inference only as a manual/local fallback when the
836
+ // caller truly omitted both origin and delivery target.
837
+ if (!origin && !explicitDeliverTo) {
612
838
  origin = getActiveOriginFromSessions();
613
839
  if (origin) {
614
840
  process.stderr.write(`[${BRAND}] auto-detected origin from active session: ${origin}\n`);
841
+ process.stderr.write(`[${BRAND}] NOTE: active-session origin detection is a manual/local fallback. ` +
842
+ `Chat-triggered callers should pass --deliver-to from inbound metadata chat_id.\n`);
615
843
  }
616
844
  }
617
845
 
618
846
  // -- Auto-derive deliver-to from origin ---------------------------------
619
847
  // If origin is "telegram:<id>", use <id> as the default deliver-to target.
620
848
  let defaultDeliverTo = null;
621
- let defaultDeliverCh = 'telegram';
849
+ let defaultDeliverCh = explicitDeliverChannel || 'telegram';
622
850
  if (origin) {
623
- const originMatch = /^([^:]+):(.+)$/.exec(origin);
624
- if (originMatch) {
625
- defaultDeliverCh = originMatch[1];
626
- defaultDeliverTo = originMatch[2];
851
+ const { channel, target } = parseOriginTarget(origin);
852
+ if (channel && target) {
853
+ if (!explicitDeliverChannel) defaultDeliverCh = channel;
854
+ defaultDeliverTo = target;
627
855
  }
628
856
  }
629
857
 
630
- const deliverTo = flags['deliver-to'] || defaultDeliverTo;
631
- const deliverChannel = flags['deliver-channel'] || defaultDeliverCh || 'telegram';
858
+ const deliverTo = explicitDeliverTo || defaultDeliverTo;
859
+ const deliverChannel = explicitDeliverChannel || defaultDeliverCh || 'telegram';
632
860
  const deliverMode = flags['delivery-mode'] || 'announce';
633
861
  const mode = flags.mode || 'fresh';
634
862
 
@@ -645,6 +873,9 @@ async function cmdEnqueue(flags) {
645
873
 
646
874
  // -- Watchdog monitoring flags -----------------------------
647
875
  const noMonitorRaw = flags['no-monitor'];
876
+ const noMonitorReason = typeof noMonitorRaw === 'string' && noMonitorRaw.trim()
877
+ ? noMonitorRaw.trim()
878
+ : null;
648
879
  const noMonitor = !!noMonitorRaw;
649
880
  const monitorEnabled = !noMonitor && flags.monitor !== 'false';
650
881
  const monitorInterval = flags['monitor-interval'] || config.watchdogIntervalCron || '*/15 * * * *';
@@ -659,6 +890,7 @@ async function cmdEnqueue(flags) {
659
890
  "REJECTED: --deliver-to is required for dispatch jobs.\n" +
660
891
  "Pass --deliver-to <chat_id> (e.g. --deliver-to -100200000000 for a group, " +
661
892
  "or --deliver-to 123456789 for a DM).\n" +
893
+ "Chat-triggered callers should pass inbound metadata chat_id here, especially for group chats.\n" +
662
894
  "Alternatively, pass --origin telegram:<chat_id> to auto-derive the delivery target.\n" +
663
895
  "Pass --no-monitor \"<reason>\" only if you explicitly want to skip delivery (audit trail required).",
664
896
  2
@@ -769,26 +1001,11 @@ async function cmdEnqueue(flags) {
769
1001
  const doneScriptPath = join(__dirname, 'index.mjs');
770
1002
  parts.push(``);
771
1003
  parts.push(`---`);
772
- parts.push(`COMPLETION SIGNAL -- READ CAREFULLY:`);
773
- parts.push(``);
774
- parts.push(`Only call this command after ALL of the following are true:`);
775
- parts.push(` 1. All file edits are saved`);
776
- parts.push(` 2. All commits are pushed (git push completed successfully)`);
777
- parts.push(` 3. All API calls (e.g. GitHub comment replies) are done`);
778
- parts.push(` 4. You have verified the work is complete`);
779
- parts.push(``);
780
- parts.push(`Call this as your ABSOLUTE FINAL action -- nothing else runs after this:`);
781
- parts.push(` node '${doneScriptPath}' done --label '${label.replace(/'/g, "'\\''")}' \\`);
782
- parts.push(` --summary "<what you actually did>" \\`);
783
- parts.push(` --checklist '{"work_complete":true,"tests_passed":true,"pushed":true}' \\`);
784
- parts.push(` [--sha "<git commit SHA if applicable>"]`);
785
- parts.push(``);
786
- parts.push(`Checklist rules:`);
787
- parts.push(` - work_complete MUST be true -- you are asserting you have finished ALL assigned work`);
788
- parts.push(` - If tests failed or push failed, do NOT set tests_passed:true or pushed:true -- instead continue working`);
789
- parts.push(` - Only include tests_passed/pushed if they apply to your task`);
790
- parts.push(`If your task involved git commits, --sha is required and must be the actual SHA of your pushed commit. The done script will reject invented or placeholder SHAs.`);
791
- parts.push(`Do NOT call done while planning, reading files, or mid-task. If you have not yet pushed a commit, you are not done.`);
1004
+ parts.push(buildCompletionSignalInstructions({
1005
+ label,
1006
+ taskPrompt: message,
1007
+ doneScriptPath,
1008
+ }));
792
1009
  parts.push(`---`);
793
1010
  parts.push(``);
794
1011
  parts.push(`---`);
@@ -802,15 +1019,16 @@ async function cmdEnqueue(flags) {
802
1019
  const taskMessage = parts.join('\n');
803
1020
 
804
1021
  // -- Call gateway agent method -------------------------------
805
- // Gateway deliver is used as a fast-path secondary. The scheduler watcher
806
- // (created below) is the primary delivery path with retry + audit trail.
807
- // Both may fire -- at-least-once semantics, duplicates acceptable.
1022
+ // Final user delivery belongs to the scheduler watcher below.
1023
+ // Keep the gateway spawn fire-and-forget so raw tool output or internal
1024
+ // done payloads cannot leak directly to the chat ahead of the durable
1025
+ // post-office delivery path.
808
1026
  try {
809
1027
  const response = gatewayCall('agent', {
810
1028
  message: taskMessage,
811
1029
  sessionKey,
812
1030
  idempotencyKey: idem,
813
- deliver: !!deliverTo,
1031
+ deliver: false,
814
1032
  lane: 'subagent',
815
1033
  timeout: timeoutS,
816
1034
  label: label,
@@ -822,6 +1040,11 @@ async function cmdEnqueue(flags) {
822
1040
  } : {}),
823
1041
  }, { timeout: 15000 });
824
1042
 
1043
+ const deliveryDisabled = !deliverTo && noMonitor;
1044
+ const deliveryDisabledReason = deliveryDisabled
1045
+ ? (noMonitorReason || 'explicit opt-out via --no-monitor')
1046
+ : null;
1047
+
825
1048
  // Update ledger
826
1049
  setLabel(label, {
827
1050
  sessionKey,
@@ -834,9 +1057,12 @@ async function cmdEnqueue(flags) {
834
1057
  deliverTo: deliverTo || null,
835
1058
  deliverChannel: deliverChannel || null,
836
1059
  deliveryMode: deliverMode || null,
1060
+ deliveryDisabled,
1061
+ deliveryDisabledReason,
837
1062
  verifyCmd: verifyCmd || null,
838
1063
  spawnedAt: new Date().toISOString(),
839
1064
  timeoutSeconds: timeoutS,
1065
+ idleThresholdSeconds: parseInt(flags['idle-threshold'] || '300', 10),
840
1066
  // Fix 4: Store timeout so cmdDone threshold logic can use it correctly.
841
1067
  timeout: timeoutS,
842
1068
  status: 'running',
@@ -879,51 +1105,29 @@ async function cmdEnqueue(flags) {
879
1105
  }
880
1106
 
881
1107
  // -- Register scheduler watcher for delivery ---------------
882
- // Creates a one-shot shell job that runs watcher.mjs (blocks until session
883
- // completes, outputs result). The scheduler's handleDelivery delivers with
884
- // retry, alias resolution, and audit trail in scheduler.db.
885
- // Gateway deliver:true is kept as a fast-path secondary (see deliver flag above).
1108
+ // Creates a quick-poll shell job that runs watcher.mjs once per tick. Empty
1109
+ // stdout means "still running" and advances the next tick without delivery.
1110
+ // Terminal stdout goes through the scheduler's handleDelivery with retry,
1111
+ // alias resolution, and audit trail in scheduler.db.
1112
+ // The watcher is the only final-delivery path for dispatched jobs.
886
1113
  const sq = s => String(s).replace(/'/g, "'\\''");
887
1114
  let schedulerWatcherOk = false;
888
1115
  if (deliverTo && deliverMode !== 'none') {
889
1116
  try {
890
- const watcherPath = join(__dirname, 'watcher.mjs');
891
- // Watcher timeout = session timeout + 120s buffer for startup/polling
892
- const watcherTimeoutS = timeoutS + 120;
893
- const idleThresholdS = flags['idle-threshold'] || '300';
894
- const watcherCmd = `DISPATCH_LABELS_PATH='${sq(LABELS_PATH)}' '${sq(process.execPath)}' '${sq(watcherPath)}' --label '${sq(label)}' --timeout ${watcherTimeoutS} --poll-interval 20 --idle-threshold ${idleThresholdS}`;
895
-
896
- const nowUtc = new Date().toISOString().replace('T', ' ').slice(0, 19);
897
- const jobSpec = JSON.stringify({
898
- name: `${agentBrand}-deliver:${label}`,
899
- schedule_kind: 'at',
900
- schedule_at: nowUtc,
901
- session_target: 'shell',
902
- payload_kind: 'shellCommand',
903
- payload_message: watcherCmd,
904
- delivery_mode: 'announce-always',
905
- delivery_channel: deliverChannel,
906
- delivery_to: deliverTo,
907
- delivery_guarantee: 'at-least-once',
908
- ttl_hours: config.deliver_watcher_ttl_hours ?? 48, // configurable TTL (deliver_watcher_ttl_hours); default 48h
909
- overlap_policy: 'skip',
910
- // Shell ceiling = max(initial timeout, rolling extension cap) + headroom.
911
- // The watcher can extend its deadline up to MAX_DEADLINE_EXTENSION (4h) on
912
- // activity (token growth / JSONL mtime). Headroom covers 2*FLAT_WINDOW + slop.
913
- // Watcher constants: FLAT_WINDOW_MS=180s, MAX_DEADLINE_EXTENSION=4h.
914
- run_timeout_ms: Math.max(watcherTimeoutS, 4 * 3600) * 1000
915
- + 420 * 1000, // +7min headroom (2*FLAT_WINDOW + 1min slop)
916
- delete_after_run: 1, // auto-delete after watcher completes
917
- origin: origin || 'system',
918
- });
919
- const schedulerCli = join(__dirname, '..', 'cli.js');
920
- execFileSync(process.execPath, [schedulerCli, 'jobs', 'add', jobSpec], {
921
- encoding: 'utf-8',
922
- timeout: 10000,
923
- stdio: ['pipe', 'pipe', 'pipe'],
1117
+ const watcherJob = scheduleDeliveryWatcherJob({
1118
+ label,
1119
+ deliverTo,
1120
+ deliverChannel,
1121
+ timeoutSeconds: timeoutS,
1122
+ idleThresholdSeconds: flags['idle-threshold'] || '300',
1123
+ origin: origin || 'system',
1124
+ agentBrand,
924
1125
  });
925
1126
  schedulerWatcherOk = true;
926
- process.stderr.write(`[${agentBrand}] scheduler watcher registered: ${agentBrand}-deliver:${label}\n`);
1127
+ process.stderr.write(
1128
+ `[${agentBrand}] scheduler watcher registered: ${agentBrand}-deliver:${label}` +
1129
+ `${watcherJob?.id ? ` (${watcherJob.id})` : ''}\n`
1130
+ );
927
1131
  } catch (err) {
928
1132
  process.stderr.write(`[${agentBrand}] scheduler watcher FAILED (gateway fallback active): ${err.message}\n`);
929
1133
  }
@@ -934,7 +1138,7 @@ async function cmdEnqueue(flags) {
934
1138
  let watchdogJobId = null;
935
1139
  if (monitorEnabled && deliverTo) {
936
1140
  try {
937
- const checkCmd = `'${sq(process.execPath)}' '${sq(join(__dirname, 'index.mjs'))}' stuck --label '${sq(label)}' --threshold-min ${monitorTimeout}`;
1141
+ const checkCmd = `'${sq(process.execPath)}' '${sq(join(__dirname, 'index.mjs'))}' result --label '${sq(label)}'`;
938
1142
  const alertChannel = deliverChannel || 'telegram';
939
1143
  const alertTarget = deliverTo;
940
1144
  const watchdogSpec = JSON.stringify({
@@ -979,6 +1183,18 @@ async function cmdEnqueue(flags) {
979
1183
  }
980
1184
  }
981
1185
 
1186
+ const delivery = buildDispatchDeliverySurface({
1187
+ deliverTo,
1188
+ deliverChannel,
1189
+ deliveryMode: deliverMode,
1190
+ deliveryDisabled,
1191
+ deliveryDisabledReason,
1192
+ ...(deliverTo ? {
1193
+ scheduler: schedulerWatcherOk,
1194
+ gateway: true,
1195
+ } : {}),
1196
+ });
1197
+
982
1198
  out({
983
1199
  ok: true,
984
1200
  label,
@@ -987,12 +1203,7 @@ async function cmdEnqueue(flags) {
987
1203
  mode: isFresh ? 'fresh' : 'reuse',
988
1204
  agent,
989
1205
  status: 'accepted',
990
- delivery: deliverTo ? {
991
- scheduler: schedulerWatcherOk,
992
- gateway: !!deliverTo,
993
- target: deliverTo,
994
- channel: deliverChannel,
995
- } : null,
1206
+ delivery,
996
1207
  watchdog: monitorEnabled ? {
997
1208
  enabled: watchdogJobOk,
998
1209
  jobId: watchdogJobId,
@@ -1000,35 +1211,46 @@ async function cmdEnqueue(flags) {
1000
1211
  timeout: monitorTimeout,
1001
1212
  ...(monitorEnabled && !deliverTo ? { skipped: true, reason: 'no --deliver-to target' } : {}),
1002
1213
  } : null,
1003
- message: schedulerWatcherOk
1004
- ? 'Session spawned. Delivery via scheduler (primary) + gateway (secondary).'
1005
- : deliverTo
1006
- ? 'Session spawned. Delivery via gateway only (scheduler watcher failed).'
1007
- : 'Session spawned via gateway. Agent is running.',
1214
+ message: delivery.status === 'disabled'
1215
+ ? `Session spawned. Delivery intentionally disabled${delivery.reason ? ` (${delivery.reason}).` : '.'}`
1216
+ : schedulerWatcherOk
1217
+ ? 'Session spawned. Delivery via scheduler (primary) + gateway (secondary).'
1218
+ : deliverTo
1219
+ ? 'Session spawned. Delivery via gateway only (scheduler watcher failed).'
1220
+ : 'Session spawned. Delivery target missing or not recorded.',
1008
1221
  });
1009
1222
 
1010
1223
  // -- Post-spawn verification (Fix 3) --------------------------------
1011
1224
  // Canary: poll sessions.json up to 3 times at 10s intervals to confirm the
1012
- // session appeared in the store. Non-fatal -- output is already written above.
1013
- // If the session never shows up, stderr gets a loud warning and ledger status
1014
- // is set to 'spawn-warning'. The watcher provides the definitive error path.
1225
+ // session appeared in the store. A session store entry with sessionId or
1226
+ // startedAt/sessionStartedAt is enough: long first turns may not flush JSONL,
1227
+ // token counts, or chat.history until the model call completes. The delivery
1228
+ // watcher owns later completion/failure handling.
1015
1229
  const SPAWN_POLL_MAX = 3;
1016
1230
  const SPAWN_POLL_DELAY_MS = 10_000;
1017
1231
  let spawnConfirmed = false;
1018
1232
  for (let spawnPoll = 0; spawnPoll < SPAWN_POLL_MAX; spawnPoll++) {
1019
1233
  await sleep(SPAWN_POLL_DELAY_MS);
1020
1234
  const spawnStore = readSessionsStore(agent);
1021
- if (spawnStore && sessionKey in spawnStore) {
1235
+ const signal = inspectSessionActivitySignal(sessionKey, spawnStore);
1236
+ if (signal.hasStartedSignal || signal.hasActivitySignal) {
1022
1237
  spawnConfirmed = true;
1023
1238
  break;
1024
1239
  }
1025
1240
  }
1026
1241
  if (!spawnConfirmed) {
1027
- process.stderr.write(
1028
- `[${agentBrand}] WARNING: session ${sessionKey} did not appear in gateway after ` +
1029
- `${(SPAWN_POLL_MAX * SPAWN_POLL_DELAY_MS) / 1000}s -- spawn may have failed\n`
1030
- );
1031
- setLabel(label, { status: 'spawn-warning' });
1242
+ const laneError = getGatewayLaneTaskError(sessionKey);
1243
+ const spawnError = laneError.found && laneError.error
1244
+ ? `spawn-failure: ${laneError.error}`
1245
+ : `spawn-failure: session ${sessionKey} never produced transcript/history within ` +
1246
+ `${(SPAWN_POLL_MAX * SPAWN_POLL_DELAY_MS) / 1000}s`;
1247
+ process.stderr.write(`[${agentBrand}] WARNING: ${spawnError}\n`);
1248
+ setLabel(label, {
1249
+ status: 'error',
1250
+ error: spawnError,
1251
+ summary: spawnError,
1252
+ });
1253
+ disarmWatchdog(label);
1032
1254
  }
1033
1255
  } catch (err) {
1034
1256
  die(`gateway agent call failed: ${err.message}`);
@@ -1065,62 +1287,80 @@ function cmdStatus(flags) {
1065
1287
  const ageMs = Date.now() - spawnedAtMs;
1066
1288
  const STARTUP_GRACE_MS = config.startupGraceMs ?? 300_000;
1067
1289
 
1068
- // -- Heartbeat-based liveness guard ----------------------------------
1069
- // The watcher process writes lastPing every 60s while the session is live.
1070
- // If the ping is fresh, the watcher is alive and working -- defer auto-resolve
1071
- // to avoid killing sessions during slow tool calls, docker builds, etc.
1072
- //
1073
- // PING_STALE_MS: 3x the 60s ping interval -- if we haven't heard from the
1074
- // watcher in 3 min, it's probably dead; fall through to check.
1075
- // hardCeilingMs: job timeout * 1.5 -- absolute max regardless of ping age.
1076
- // Catches zombie watchers (watcher alive but session is stuck).
1077
- // idleThresholdMs: max(job timeout, 10 min) -- replaces the old hardcoded 10-min
1078
- // threshold so longer jobs aren't killed at exactly 10 min.
1079
- const PING_STALE_MS = 3 * 60 * 1000;
1080
- const idleThresholdMs = Math.max((entry.timeoutSeconds || 600) * 1000, 10 * 60 * 1000);
1081
- // hardCeilingMs must be >= idleThresholdMs to avoid the ceiling undercutting the
1082
- // idle floor (e.g. timeoutSeconds=300 -> ceiling=7.5 min < idle=10 min would force
1083
- // zombie-guard threshold for sessions that should still use idleThresholdMs).
1084
- const hardCeilingMs = Math.max((entry.timeoutSeconds || 600) * 1000 * 1.5, idleThresholdMs * 1.5);
1085
-
1086
- let check;
1087
- if (ageMs < STARTUP_GRACE_MS) {
1088
- // Within startup grace -- never auto-resolve
1089
- check = { shouldResolve: false };
1090
- } else if (entry.lastPing) {
1091
- const pingAgeMs = Date.now() - new Date(entry.lastPing).getTime();
1092
- if (pingAgeMs < PING_STALE_MS && ageMs < hardCeilingMs) {
1093
- // Watcher alive and within job ceiling -- defer auto-resolve
1290
+ const bootstrapFailure = !entry.lastPing
1291
+ ? inspectSessionBootstrapFailure(
1292
+ entry.sessionKey,
1293
+ sessionsStore,
1294
+ spawnedAtMs,
1295
+ STARTUP_GRACE_MS,
1296
+ )
1297
+ : { shouldResolve: false, reason: null, errorMsg: null };
1298
+ if (bootstrapFailure.shouldResolve) {
1299
+ setLabel(label, {
1300
+ status: 'error',
1301
+ error: bootstrapFailure.errorMsg,
1302
+ summary: `Auto-resolved as spawn failure: ${bootstrapFailure.reason}`,
1303
+ });
1304
+ syncAction = `auto-resolved as spawn failure: ${bootstrapFailure.reason}`;
1305
+ disarmWatchdog(label);
1306
+ } else {
1307
+ // -- Heartbeat-based liveness guard ----------------------------------
1308
+ // The watcher process writes lastPing every 60s while the session is live.
1309
+ // If the ping is fresh, the watcher is alive and working -- defer auto-resolve
1310
+ // to avoid killing sessions during slow tool calls, docker builds, etc.
1311
+ //
1312
+ // PING_STALE_MS: 3x the 60s ping interval -- if we haven't heard from the
1313
+ // watcher in 3 min, it's probably dead; fall through to check.
1314
+ // hardCeilingMs: job timeout * 1.5 -- absolute max regardless of ping age.
1315
+ // Catches zombie watchers (watcher alive but session is stuck).
1316
+ // idleThresholdMs: max(job timeout, 10 min) -- replaces the old hardcoded 10-min
1317
+ // threshold so longer jobs aren't killed at exactly 10 min.
1318
+ const PING_STALE_MS = 3 * 60 * 1000;
1319
+ const idleThresholdMs = Math.max((entry.timeoutSeconds || 600) * 1000, 10 * 60 * 1000);
1320
+ // hardCeilingMs must be >= idleThresholdMs to avoid the ceiling undercutting the
1321
+ // idle floor (e.g. timeoutSeconds=300 -> ceiling=7.5 min < idle=10 min would force
1322
+ // zombie-guard threshold for sessions that should still use idleThresholdMs).
1323
+ const hardCeilingMs = Math.max((entry.timeoutSeconds || 600) * 1000 * 1.5, idleThresholdMs * 1.5);
1324
+
1325
+ let check;
1326
+ if (ageMs < STARTUP_GRACE_MS) {
1327
+ // Within startup grace -- never auto-resolve
1094
1328
  check = { shouldResolve: false };
1329
+ } else if (entry.lastPing) {
1330
+ const pingAgeMs = Date.now() - new Date(entry.lastPing).getTime();
1331
+ if (pingAgeMs < PING_STALE_MS && ageMs < hardCeilingMs) {
1332
+ // Watcher alive and within job ceiling -- defer auto-resolve
1333
+ check = { shouldResolve: false };
1334
+ } else {
1335
+ // Ping stale OR past hard ceiling: fall through to session store check
1336
+ const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
1337
+ check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
1338
+ }
1095
1339
  } else {
1096
- // Ping stale OR past hard ceiling: fall through to session store check
1340
+ // No lastPing -- backward compat (sessions dispatched before heartbeat feature).
1341
+ // Use idleThresholdMs (job-aware) instead of the old hardcoded 10 min.
1097
1342
  const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
1098
1343
  check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
1099
1344
  }
1100
- } else {
1101
- // No lastPing -- backward compat (sessions dispatched before heartbeat feature).
1102
- // Use idleThresholdMs (job-aware) instead of the old hardcoded 10 min.
1103
- const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
1104
- check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
1105
- }
1106
1345
 
1107
- if (check.shouldResolve) {
1108
- if (check.is529) {
1109
- setLabel(label, {
1110
- status: 'error',
1111
- error: check.errorMsg || `529/overload: ${check.reason}`,
1112
- summary: `Auto-resolved as error: ${check.reason}`,
1113
- });
1114
- syncAction = `auto-resolved as 529 error: ${check.reason}`;
1115
- } else {
1116
- setLabel(label, {
1117
- status: 'interrupted',
1118
- summary: `Auto-resolved: session went idle without calling done. Work may be incomplete. (${check.reason})`,
1119
- });
1120
- syncAction = `auto-resolved as interrupted: ${check.reason}`;
1346
+ if (check.shouldResolve) {
1347
+ if (check.is529) {
1348
+ setLabel(label, {
1349
+ status: 'error',
1350
+ error: check.errorMsg || `529/overload: ${check.reason}`,
1351
+ summary: `Auto-resolved as error: ${check.reason}`,
1352
+ });
1353
+ syncAction = `auto-resolved as 529 error: ${check.reason}`;
1354
+ } else {
1355
+ setLabel(label, {
1356
+ status: 'interrupted',
1357
+ summary: `Auto-resolved: session went idle without calling done. Work may be incomplete. (${check.reason})`,
1358
+ });
1359
+ syncAction = `auto-resolved as interrupted: ${check.reason}`;
1360
+ }
1361
+ // Disarm watchdog when session is auto-resolved
1362
+ disarmWatchdog(label);
1121
1363
  }
1122
- // Disarm watchdog when session is auto-resolved
1123
- disarmWatchdog(label);
1124
1364
  }
1125
1365
  }
1126
1366
 
@@ -1128,6 +1368,9 @@ function cmdStatus(flags) {
1128
1368
  if (entry.sessionKey && sessionsStore) {
1129
1369
  const sessionEntry = sessionsStore[entry.sessionKey];
1130
1370
  if (sessionEntry) {
1371
+ if (sessionEntry.sessionId && entry.sessionId !== sessionEntry.sessionId) {
1372
+ setLabel(label, { sessionId: sessionEntry.sessionId });
1373
+ }
1131
1374
  liveness = {
1132
1375
  updatedAt: sessionEntry.updatedAt,
1133
1376
  ageMs: sessionEntry.updatedAt
@@ -1159,6 +1402,7 @@ function cmdStatus(flags) {
1159
1402
  updatedAt: current.updatedAt,
1160
1403
  summary: current.summary || null,
1161
1404
  completion: current.completion || null,
1405
+ delivery: buildDispatchDeliverySurface(current),
1162
1406
  error: current.error || null,
1163
1407
  liveness,
1164
1408
  ...(syncAction ? { syncAction } : {}),
@@ -1192,7 +1436,7 @@ function hasActiveWatcher(label) {
1192
1436
  r.status = 'running'
1193
1437
  OR (r.status = 'pending' AND r.started_at > datetime('now','-5 minutes'))
1194
1438
  )
1195
- `).get(`%-deliver:${label}`);
1439
+ `).get(`%-deliver:${label}%`);
1196
1440
  return (row?.c || 0) > 0;
1197
1441
  } catch {
1198
1442
  return false;
@@ -1346,6 +1590,28 @@ function cmdSync(flags) {
1346
1590
  const syncStore = getSyncStore(entry);
1347
1591
  const spawnedAtMs = entry.spawnedAt ? new Date(entry.spawnedAt).getTime() : 0;
1348
1592
  const elapsedMs = Date.now() - spawnedAtMs;
1593
+ const STARTUP_GRACE_MS_SYNC = config.startupGraceMs ?? 300_000;
1594
+
1595
+ const bootstrapFailure = !entry.lastPing
1596
+ ? inspectSessionBootstrapFailure(
1597
+ entry.sessionKey,
1598
+ syncStore,
1599
+ spawnedAtMs,
1600
+ STARTUP_GRACE_MS_SYNC,
1601
+ )
1602
+ : { shouldResolve: false, reason: null, errorMsg: null };
1603
+ if (bootstrapFailure.shouldResolve) {
1604
+ changes.push({ label: name, from: 'running', to: 'error', reason: bootstrapFailure.reason });
1605
+ if (!dryRun) {
1606
+ setLabel(name, {
1607
+ status: 'error',
1608
+ error: bootstrapFailure.errorMsg,
1609
+ summary: `Synced as spawn failure: ${bootstrapFailure.reason}`,
1610
+ });
1611
+ disarmWatchdog(name);
1612
+ }
1613
+ continue;
1614
+ }
1349
1615
 
1350
1616
  // -- Heartbeat-based liveness guard (mirrors cmdStatus logic) ---------
1351
1617
  // Skip auto-resolve when the watcher's lastPing heartbeat is fresh.
@@ -1412,32 +1678,62 @@ function cmdResult(flags) {
1412
1678
  return;
1413
1679
  }
1414
1680
 
1415
- // Try to get the session transcript to find last assistant message
1681
+ // Conservative transcript recovery:
1682
+ // - lastReply is ONLY populated from a terminal JSONL-scoped assistant reply
1683
+ // - diagnosticReply captures the last meaningful assistant text for timeout reporting
1416
1684
  let lastReply = null;
1685
+ let diagnosticReply = null;
1686
+ let recoverySource = null;
1687
+ let recoverySessionId = entry.sessionId || null;
1688
+ const resultAgent = entry.agent || agentFromSessionKey(entry.sessionKey) || 'main';
1689
+ const resultStore = entry.sessionKey ? readSessionsStore(resultAgent) : null;
1690
+ const resultSessionEntry = entry.sessionKey && resultStore ? resultStore[entry.sessionKey] : null;
1691
+
1692
+ if (resultSessionEntry?.sessionId) {
1693
+ recoverySessionId = resultSessionEntry.sessionId;
1694
+ if (entry.sessionId !== recoverySessionId) {
1695
+ setLabel(label, { sessionId: recoverySessionId });
1696
+ }
1697
+ }
1698
+
1699
+ if (recoverySessionId) {
1700
+ const jsonlEntries = readJsonlTailEntries(recoverySessionId, resultAgent, 200);
1701
+ const terminalReply = extractTerminalAssistantReplyFromEntries(jsonlEntries);
1702
+ const jsonlDiagnostic = extractLastMeaningfulAssistantReplyFromEntries(jsonlEntries);
1703
+
1704
+ if (terminalReply) {
1705
+ lastReply = terminalReply;
1706
+ recoverySource = 'jsonl-terminal';
1707
+ }
1708
+ if (jsonlDiagnostic) {
1709
+ diagnosticReply = jsonlDiagnostic;
1710
+ if (!recoverySource) recoverySource = 'jsonl-diagnostic';
1711
+ }
1712
+ }
1713
+
1417
1714
  if (entry.sessionKey) {
1418
1715
  try {
1419
1716
  const result = gatewayCall('chat.history', {
1420
1717
  sessionKey: entry.sessionKey,
1421
1718
  }, { timeout: 10000 });
1422
1719
 
1423
- if (result?.messages?.length) {
1424
- for (let i = result.messages.length - 1; i >= 0; i--) {
1425
- const e = result.messages[i];
1426
- if (e.role === 'assistant' && e.content) {
1427
- lastReply = typeof e.content === 'string'
1428
- ? e.content
1429
- : Array.isArray(e.content)
1430
- ? e.content.map(c => c.text || '').join('')
1431
- : JSON.stringify(e.content);
1432
- break;
1433
- }
1720
+ if (result?.messages?.length && !diagnosticReply) {
1721
+ diagnosticReply = extractLastMeaningfulAssistantReplyFromEntries(result.messages);
1722
+ if (diagnosticReply && !recoverySource) recoverySource = 'history-diagnostic';
1723
+ }
1724
+
1725
+ if (!lastReply && result?.messages?.length) {
1726
+ const historyTerminal = extractTerminalAssistantReplyFromEntries(result.messages);
1727
+ if (historyTerminal) {
1728
+ lastReply = historyTerminal;
1729
+ recoverySource = 'history-terminal';
1434
1730
  }
1435
1731
  }
1436
1732
  } catch {}
1437
1733
  }
1438
1734
 
1439
1735
  // -- Watchdog cleanup: disable watchdog job when result is available --
1440
- if (lastReply && entry.watchdogJobId) {
1736
+ if ((lastReply || hasCompletionSignal(entry.completion)) && entry.watchdogJobId) {
1441
1737
  disarmWatchdog(label);
1442
1738
  }
1443
1739
 
@@ -1449,11 +1745,64 @@ function cmdResult(flags) {
1449
1745
  spawnedAt: entry.spawnedAt,
1450
1746
  summary: entry.summary || (lastReply ? lastReply.slice(0, 500) : null),
1451
1747
  completion: entry.completion || null,
1748
+ delivery: buildDispatchDeliverySurface(entry),
1452
1749
  lastReply: lastReply || null,
1750
+ diagnosticReply: diagnosticReply || lastReply || null,
1751
+ recovery: recoverySource || recoverySessionId ? {
1752
+ source: recoverySource || null,
1753
+ sessionId: recoverySessionId || null,
1754
+ } : null,
1453
1755
  error: entry.error || null,
1454
1756
  });
1455
1757
  }
1456
1758
 
1759
+
1760
+ function cmdWatcherHandoff(flags) {
1761
+ const label = flags.label;
1762
+ const reason = flags.reason || null;
1763
+ if (!label) die('--label is required', 2);
1764
+
1765
+ const entry = getLabel(label);
1766
+ if (!entry) {
1767
+ out({ ok: false, scheduled: false, label, message: 'No session found for this label' });
1768
+ return;
1769
+ }
1770
+
1771
+ if (entry.status && entry.status !== 'running') {
1772
+ out({ ok: true, scheduled: false, label, reason: 'label already terminal', status: entry.status });
1773
+ return;
1774
+ }
1775
+
1776
+ if (!entry.deliverTo || entry.deliveryMode === 'none') {
1777
+ out({ ok: true, scheduled: false, label, reason: 'delivery disabled for this label' });
1778
+ return;
1779
+ }
1780
+
1781
+ const agentBrand = config.agents?.[entry.agent || 'main']?.name
1782
+ || (entry.agent && entry.agent !== 'main' ? entry.agent : null)
1783
+ || config.name
1784
+ || BRAND;
1785
+
1786
+ const watcherJob = scheduleDeliveryWatcherJob({
1787
+ label,
1788
+ deliverTo: entry.deliverTo,
1789
+ deliverChannel: entry.deliverChannel || 'telegram',
1790
+ timeoutSeconds: Number(entry.timeoutSeconds ?? entry.timeout) || 300,
1791
+ idleThresholdSeconds: Number(entry.idleThresholdSeconds) || 300,
1792
+ origin: entry.origin || 'system',
1793
+ agentBrand,
1794
+ nameSuffix: `:handoff:${Date.now()}`,
1795
+ });
1796
+
1797
+ out({
1798
+ ok: true,
1799
+ scheduled: true,
1800
+ label,
1801
+ jobId: watcherJob?.id || null,
1802
+ reason,
1803
+ });
1804
+ }
1805
+
1457
1806
  /**
1458
1807
  * done -- agent-side completion signal (push-based).
1459
1808
  * Called by the subagent itself as its LAST action when fully complete.
@@ -1518,15 +1867,15 @@ async function cmdDone(flags) {
1518
1867
  }
1519
1868
  }
1520
1869
 
1521
- // Summary passes through as-is for raw diagnostics, but we also persist a
1522
- // first-class completion payload with deterministic delivery text so the
1523
- // watcher/post-office path never depends solely on transcript recovery.
1870
+ // Persist a first-class completion payload with deterministic delivery text
1871
+ // so the watcher/post-office path never depends solely on transcript recovery
1872
+ // or on whatever raw blob the model chose to print at the end.
1524
1873
  const completion = buildTerminalCompletionPayload({
1525
1874
  summary: rawSummary,
1526
1875
  checklist,
1527
1876
  sha,
1528
1877
  });
1529
- const summary = completion.summary || rawSummary;
1878
+ const summary = completion.summary || null;
1530
1879
 
1531
1880
  const existing = getLabel(label);
1532
1881
 
@@ -1642,7 +1991,7 @@ async function cmdDone(flags) {
1642
1991
  // Label was never registered (e.g. direct subagent spawn, not via enqueue).
1643
1992
  // This is not an error -- the work completed, the label just wasn't tracked.
1644
1993
  process.stderr.write(`[${BRAND}] warn: no session found for label "${label}" -- registering as done\n`);
1645
- setLabel(label, { status: 'done', summary, completion, ...(sha ? { sha } : {}) });
1994
+ setLabelDone(label, { summary, completion, ...(sha ? { sha } : {}) });
1646
1995
 
1647
1996
  // No watcher is polling for this label, so actively notify via the gateway
1648
1997
  // post office using delivery config from config.json as fallback target.
@@ -1659,6 +2008,7 @@ async function cmdDone(flags) {
1659
2008
  duration_ms: 0,
1660
2009
  session_key: null,
1661
2010
  summary,
2011
+ completion,
1662
2012
  deliverTo,
1663
2013
  deliveryChannel,
1664
2014
  }).catch(() => {});
@@ -1670,8 +2020,7 @@ async function cmdDone(flags) {
1670
2020
  return;
1671
2021
  }
1672
2022
 
1673
- setLabel(label, {
1674
- status: 'done',
2023
+ setLabelDone(label, {
1675
2024
  summary,
1676
2025
  completion,
1677
2026
  ...(sha ? { sha } : {}),
@@ -1690,6 +2039,8 @@ async function cmdDone(flags) {
1690
2039
  status: 'ok',
1691
2040
  duration_ms: Date.now() - spawnedAtMs,
1692
2041
  session_key: existing.sessionKey || null,
2042
+ summary,
2043
+ completion,
1693
2044
  }).catch(() => {});
1694
2045
 
1695
2046
  out({ ok: true, label, status: 'done', summary, completion, message: 'Label marked done via agent signal.' });
@@ -1699,16 +2050,31 @@ async function cmdDone(flags) {
1699
2050
  * send / steer -- send a message into a running session.
1700
2051
  *
1701
2052
  * Flags:
1702
- * --label <string> Required (unless --session-key)
1703
- * --message <string> Required. Message to send
1704
- * --session-key <key> Optional. Direct session key (bypasses label lookup)
2053
+ * --label <string> Required (unless --session-key)
2054
+ * --message <string> Message to send
2055
+ * --message-file <path> Read message text from a file (`-` = stdin)
2056
+ * --message-env <VAR> Read message text from an environment variable
2057
+ * --message-stdin Read message text from stdin explicitly
2058
+ * (stdin is also auto-read when piped and no other message source is set)
2059
+ * --session-key <key> Optional. Direct session key (bypasses label lookup)
1705
2060
  */
1706
2061
  async function cmdSend(flags) {
1707
- const label = flags.label;
1708
- const message = flags.message;
2062
+ const label = flags.label;
1709
2063
  const directKey = flags['session-key'];
2064
+ let message = null;
1710
2065
 
1711
- if (!message) die('--message is required', 2);
2066
+ try {
2067
+ message = await resolveMessageInput({
2068
+ message: flags.message,
2069
+ messageFile: flags['message-file'],
2070
+ messageEnv: flags['message-env'],
2071
+ messageStdin: flags['message-stdin'],
2072
+ });
2073
+ } catch (err) {
2074
+ die(err.message, 2);
2075
+ }
2076
+
2077
+ if (message === null || message.length === 0) die('--message, --message-file, --message-env, --message-stdin, or piped stdin is required', 2);
1712
2078
  if (!label && !directKey) die('--label or --session-key is required', 2);
1713
2079
 
1714
2080
  let sessionKey = directKey;
@@ -1807,6 +2173,7 @@ function cmdList(flags) {
1807
2173
  let entries = Object.entries(labels).map(([name, data]) => ({
1808
2174
  label: name,
1809
2175
  ...data,
2176
+ delivery: buildDispatchDeliverySurface(data),
1810
2177
  }));
1811
2178
 
1812
2179
  if (filterStatus) {
@@ -1833,13 +2200,15 @@ ${BRAND} -- sub-agent dispatch CLI (native gateway API)
1833
2200
  Usage: openclaw-scheduler <subcommand> [flags]
1834
2201
 
1835
2202
  Subcommands:
1836
- enqueue --label <l> --message <m>|--message-file <f> [--agent <a>] [--thinking <t>]
1837
- [--timeout <s>] [--mode fresh|reuse] [--model <m>]
1838
- [--origin <o>] (auto-detected from active session; override with e.g. "telegram:<your-group-id>")
2203
+ enqueue --label <l> [--message <m>|--message-file <f>|--message-env <VAR>|--message-stdin]
2204
+ [--agent <a>] [--thinking <t>] [--timeout <s>] [--mode fresh|reuse] [--model <m>]
2205
+ [--origin <o>] (recommended explicit value, e.g. "telegram:<chat_id>" or "system")
1839
2206
  [--deliver-to <id>] [--deliver-channel <ch>] [--delivery-mode <m>]
1840
- (--deliver-to defaults to origin chat ID when --origin is "telegram:<id>")
2207
+ (--deliver-to should come from inbound metadata chat_id; explicit --deliver-to becomes origin when --origin is omitted)
2208
+ (active-session auto-detect is preserved only as a manual/local fallback)
1841
2209
  [--no-monitor] [--monitor-interval <cron>] [--monitor-timeout <min>]
1842
2210
  [--verify-cmd <shell_cmd>]
2211
+ (stdin is auto-read when piped and no explicit message source is set)
1843
2212
 
1844
2213
  status --label <l>
1845
2214
 
@@ -1847,9 +2216,13 @@ Subcommands:
1847
2216
 
1848
2217
  result --label <l>
1849
2218
 
1850
- send --label <l> --message <m> [--session-key <k>]
2219
+ watcher-handoff --label <l> [--reason <text>]
2220
+
2221
+ send --label <l> [--message <m>|--message-file <f>|--message-env <VAR>|--message-stdin]
2222
+ [--session-key <k>]
1851
2223
 
1852
- steer --label <l> --message <m> (alias for send)
2224
+ steer --label <l> [--message <m>|--message-file <f>|--message-env <VAR>|--message-stdin]
2225
+ (alias for send)
1853
2226
 
1854
2227
  heartbeat --label <l> OR --session-key <k>
1855
2228
 
@@ -1871,6 +2244,7 @@ switch (subcommand) {
1871
2244
  case 'status': cmdStatus(flags); break;
1872
2245
  case 'stuck': await cmdStuck(flags); break;
1873
2246
  case 'result': cmdResult(flags); break;
2247
+ case 'watcher-handoff': cmdWatcherHandoff(flags); break;
1874
2248
  case 'send': await cmdSend(flags); break;
1875
2249
  case 'steer': await cmdSend(flags); break;
1876
2250
  case 'heartbeat': cmdHeartbeat(flags); break;