openclaw-scheduler 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/AGENTS.md +302 -0
  2. package/BEST-PRACTICES.md +506 -0
  3. package/CHANGELOG.md +82 -0
  4. package/CODE_OF_CONDUCT.md +22 -0
  5. package/CONTEXT.md +26 -0
  6. package/CONTRIBUTING.md +73 -0
  7. package/IMPLEMENTATION_SPEC.md +170 -0
  8. package/INSTALL-ADDITIONAL-HOST.md +333 -0
  9. package/INSTALL-LINUX.md +419 -0
  10. package/INSTALL-WINDOWS.md +305 -0
  11. package/INSTALL.md +364 -0
  12. package/JOB-QUICK-REF.md +222 -0
  13. package/LICENSE +21 -0
  14. package/QUICK-START.md +256 -0
  15. package/README.md +2170 -0
  16. package/SECURITY.md +34 -0
  17. package/UNINSTALL.md +129 -0
  18. package/UPGRADING.md +436 -0
  19. package/agents.js +67 -0
  20. package/approval.js +107 -0
  21. package/backup.js +390 -0
  22. package/bin/openclaw-scheduler.js +138 -0
  23. package/cli.js +1083 -0
  24. package/db.js +122 -0
  25. package/dispatch/529-recovery.mjs +204 -0
  26. package/dispatch/README.md +372 -0
  27. package/dispatch/config.example.json +24 -0
  28. package/dispatch/deliver-watcher.sh +57 -0
  29. package/dispatch/hooks.mjs +171 -0
  30. package/dispatch/index.mjs +1836 -0
  31. package/dispatch/watcher.mjs +1396 -0
  32. package/dispatch-queue.js +112 -0
  33. package/dispatcher-approvals.js +96 -0
  34. package/dispatcher-delivery.js +43 -0
  35. package/dispatcher-maintenance.js +242 -0
  36. package/dispatcher-shell.js +29 -0
  37. package/dispatcher-strategies.js +1280 -0
  38. package/dispatcher-utils.js +81 -0
  39. package/dispatcher.js +855 -0
  40. package/docs/adr-schedule-ownership.md +73 -0
  41. package/docs/gateway-contract.md +904 -0
  42. package/docs/plans/2026-03-09-fix-typescript-types.md +91 -0
  43. package/docs/plans/2026-03-09-test-coverage-gaps.md +83 -0
  44. package/docs/plans/2026-03-10-dispatcher-refactor.md +801 -0
  45. package/docs/trust-architecture.md +266 -0
  46. package/gateway.js +473 -0
  47. package/idempotency.js +119 -0
  48. package/index.d.ts +864 -0
  49. package/index.js +17 -0
  50. package/jobs.js +1224 -0
  51. package/messages.js +357 -0
  52. package/migrate-consolidate.js +694 -0
  53. package/migrate.js +125 -0
  54. package/package.json +130 -0
  55. package/paths.js +79 -0
  56. package/prompt-context.js +94 -0
  57. package/retrieval.js +176 -0
  58. package/runs.js +270 -0
  59. package/scheduler-schema.js +101 -0
  60. package/schema.sql +480 -0
  61. package/scripts/dispatch-cli-utils.mjs +65 -0
  62. package/scripts/inbox-consumer.mjs +288 -0
  63. package/scripts/stuck-detector.sh +18 -0
  64. package/scripts/stuck-run-detector.mjs +333 -0
  65. package/scripts/telegram-webhook-check.mjs +238 -0
  66. package/setup.mjs +724 -0
  67. package/shell-result.js +214 -0
  68. package/task-tracker.js +300 -0
  69. package/team-adapter.js +335 -0
  70. package/v02-runtime.js +599 -0
@@ -0,0 +1,1836 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * dispatch -- Sub-agent dispatch CLI for OpenClaw
4
+ *
5
+ * Spawns and steers isolated agent sessions via the OpenClaw Gateway API.
6
+ * Tracks label->session mappings in a local JSON ledger.
7
+ *
8
+ * Subcommands:
9
+ * enqueue Spawn a session via gateway, store label->sessionKey, return immediately
10
+ * status Query session status by label
11
+ * stuck Find sessions running past threshold with no activity
12
+ * result Get last assistant message from a session
13
+ * send Send a message INTO a running session (mid-session steering)
14
+ * steer Alias for send -- explicitly for mid-session course correction
15
+ * heartbeat Check session liveness
16
+ * list List all tracked labels
17
+ * sync Reconcile labels.json with sessions store state
18
+ * done Agent-side completion signal -- set label status=done immediately
19
+ *
20
+ * Exit codes:
21
+ * 0 -- success / nothing stuck
22
+ * 1 -- stuck runs found, or hard error
23
+ * 2 -- argument error
24
+ *
25
+ * Usage: openclaw-scheduler <subcommand> [options]
26
+ */
27
+
28
+ import { readFileSync, writeFileSync, existsSync, statSync, openSync, readSync, closeSync, renameSync } from 'fs';
29
+ import { dirname, join, resolve as pathResolve } from 'path';
30
+ import { fileURLToPath } from 'url';
31
+ import { randomUUID } from 'crypto';
32
+ import { execFileSync } from 'child_process';
33
+ import { homedir } from 'os';
34
+ import Database from 'better-sqlite3';
35
+ import { onStarted, onFinished, onStuck } from './hooks.mjs';
36
+
37
+ const __dirname = dirname(fileURLToPath(import.meta.url));
38
+ const HOME_DIR = process.env.HOME || homedir();
39
+ const GATEWAY_URL = process.env.OPENCLAW_GATEWAY_URL || 'http://127.0.0.1:18789';
40
+ let labelsCache = null;
41
+ let labelsCacheSignature = null;
42
+
43
+ // -- Invocation Directory -------------------------------------
44
+ // When invoked via symlink (e.g. my-brand/index.mjs -> dispatch/index.mjs),
45
+ // __dirname resolves to the real path (dispatch/). INVOKE_DIR resolves to the
46
+ // symlink's directory so config.json, labels.json, and self-references use the
47
+ // wrapper's directory instead of the shared module's.
48
+
49
+ const INVOKE_DIR = (() => {
50
+ try {
51
+ const argv1 = process.argv[1];
52
+ if (argv1) return dirname(pathResolve(argv1));
53
+ } catch {}
54
+ return __dirname;
55
+ })();
56
+
57
+ // -- Config ---------------------------------------------------
58
+
59
+ const LABELS_PATH = process.env.DISPATCH_LABELS_PATH || join(INVOKE_DIR, 'labels.json');
60
+
61
+ /** Load dispatch config from config.json.
62
+ * Resolution order:
63
+ * 1. DISPATCH_CONFIG_DIR env var (branded wrapper deployments)
64
+ * 2. INVOKE_DIR (argv[1] dirname -- supports symlink-based branding)
65
+ * 3. __dirname (dispatch module directory -- fallback)
66
+ */
67
+ function loadConfig() {
68
+ const searchDirs = [];
69
+ if (process.env.DISPATCH_CONFIG_DIR) searchDirs.push(pathResolve(process.env.DISPATCH_CONFIG_DIR));
70
+ if (!searchDirs.includes(INVOKE_DIR)) searchDirs.push(INVOKE_DIR);
71
+ if (!searchDirs.includes(__dirname)) searchDirs.push(__dirname);
72
+
73
+ for (const dir of searchDirs) {
74
+ try {
75
+ const cfgPath = join(dir, 'config.json');
76
+ return JSON.parse(readFileSync(cfgPath, 'utf-8'));
77
+ } catch { /* try next */ }
78
+ }
79
+ return {};
80
+ }
81
+
82
+ const config = loadConfig();
83
+ const BRAND = config.name ?? 'dispatch';
84
+
85
+ /** Load gateway auth token from config or env */
86
+ function getGatewayToken() {
87
+ if (process.env.OPENCLAW_GATEWAY_TOKEN) return process.env.OPENCLAW_GATEWAY_TOKEN;
88
+ try {
89
+ const configPath = join(HOME_DIR, '.openclaw', 'openclaw.json');
90
+ const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
91
+ return cfg?.gateway?.auth?.token || null;
92
+ } catch { return null; }
93
+ }
94
+
95
+ const GATEWAY_TOKEN = getGatewayToken();
96
+
97
+ // -- Helpers --------------------------------------------------
98
+
99
+ function die(msg, code = 1) {
100
+ process.stderr.write(`[${BRAND}] ${msg}\n`);
101
+ process.exit(code);
102
+ }
103
+
104
+ function out(obj) {
105
+ process.stdout.write(JSON.stringify(obj, null, 2) + '\n');
106
+ }
107
+
108
+ function sleep(ms) {
109
+ return new Promise(r => setTimeout(r, ms));
110
+ }
111
+
112
+ /** Parse --flag value pairs from argv (supports both --flag value and --flag=value) */
113
+ function parseFlags(argv) {
114
+ const flags = {};
115
+ for (let i = 0; i < argv.length; i++) {
116
+ const a = argv[i];
117
+ const next = argv[i + 1];
118
+ if (a.startsWith('--')) {
119
+ const eqIdx = a.indexOf('=');
120
+ if (eqIdx > 0) {
121
+ flags[a.slice(2, eqIdx)] = a.slice(eqIdx + 1);
122
+ } else if (next && !next.startsWith('--')) {
123
+ flags[a.slice(2)] = next;
124
+ i++;
125
+ } else {
126
+ flags[a.slice(2)] = true;
127
+ }
128
+ }
129
+ }
130
+ return flags;
131
+ }
132
+
133
+ function taskRequiresGitSha(taskPrompt) {
134
+ if (!taskPrompt || typeof taskPrompt !== 'string') return false;
135
+
136
+ const commandPattern = /\bgit\s+(push|rebase|cherry-pick)\b|(?:^|\s)--force-with-lease\b|(?:^|\s)--force-push\b/ig;
137
+ let match;
138
+ while ((match = commandPattern.exec(taskPrompt)) !== null) {
139
+ const before = taskPrompt.slice(Math.max(0, match.index - 40), match.index);
140
+ const negatedContext = /\b(?:do\s+not|don't|dont|never)\s+(?:use|run|call|invoke)?\s*$/i.test(before)
141
+ || /\bavoid\s+(?:using\s+)?$/i.test(before)
142
+ || /\bwithout\s+(?:using\s+)?$/i.test(before);
143
+ if (!negatedContext) return true;
144
+ }
145
+ return false;
146
+ }
147
+
148
+ // -- Labels Ledger --------------------------------------------
149
+
150
+ function getLabelsSignature() {
151
+ try {
152
+ const stats = statSync(LABELS_PATH);
153
+ return `${stats.mtimeMs}:${stats.size}`;
154
+ } catch {
155
+ return 'missing';
156
+ }
157
+ }
158
+
159
+ function loadLabels() {
160
+ const signature = getLabelsSignature();
161
+ if (labelsCache && labelsCacheSignature === signature) {
162
+ return labelsCache;
163
+ }
164
+ try {
165
+ const labels = JSON.parse(readFileSync(LABELS_PATH, 'utf-8'));
166
+ labelsCache = labels;
167
+ labelsCacheSignature = signature;
168
+ return labels;
169
+ } catch {
170
+ labelsCache = {};
171
+ labelsCacheSignature = 'missing';
172
+ return labelsCache;
173
+ }
174
+ }
175
+
176
+ function saveLabels(labels) {
177
+ const tmp = LABELS_PATH + '.tmp.' + process.pid;
178
+ writeFileSync(tmp, JSON.stringify(labels, null, 2) + '\n');
179
+ renameSync(tmp, LABELS_PATH);
180
+ labelsCache = labels;
181
+ labelsCacheSignature = getLabelsSignature();
182
+ }
183
+
184
+ function mutateLabels(mutator) {
185
+ const labels = loadLabels();
186
+ const changed = mutator(labels);
187
+ if (changed !== false) {
188
+ saveLabels(labels);
189
+ }
190
+ return labels;
191
+ }
192
+
193
+ function getLabel(name) {
194
+ return loadLabels()[name] || null;
195
+ }
196
+
197
+ function setLabel(name, data) {
198
+ const labels = mutateLabels((current) => {
199
+ current[name] = { ...current[name], ...data, updatedAt: new Date().toISOString() };
200
+ });
201
+ return labels[name];
202
+ }
203
+
204
+ // -- Gateway Calls --------------------------------------------
205
+
206
+ /**
207
+ * Call a gateway RPC method via `openclaw gateway call`.
208
+ * Returns parsed JSON response.
209
+ */
210
+ function gatewayCall(method, params = {}, opts = {}) {
211
+ const timeout = opts.timeout || 15000;
212
+ const expectFinal = opts.expectFinal || false;
213
+
214
+ const args = ['gateway', 'call', method, '--json'];
215
+ args.push('--params', JSON.stringify(params));
216
+ args.push('--timeout', String(timeout));
217
+ if (expectFinal) args.push('--expect-final');
218
+ const childEnv = GATEWAY_TOKEN ? { ...process.env, OPENCLAW_GATEWAY_TOKEN: GATEWAY_TOKEN } : process.env;
219
+
220
+ try {
221
+ const result = execFileSync('openclaw', args, {
222
+ encoding: 'utf-8',
223
+ timeout: timeout + 5000,
224
+ stdio: ['pipe', 'pipe', 'pipe'],
225
+ env: childEnv,
226
+ });
227
+ // Strip non-JSON prefix lines (e.g. plugin init logs leaking to stdout)
228
+ const trimmed = result.trim();
229
+ const jsonStart = trimmed.indexOf('{');
230
+ const cleaned = jsonStart > 0 ? trimmed.slice(jsonStart) : trimmed;
231
+ return JSON.parse(cleaned);
232
+ } catch (err) {
233
+ const stderr = err.stderr?.trim() || '';
234
+ const stdout = err.stdout?.trim() || '';
235
+ if (stdout) {
236
+ const idx = stdout.indexOf('{');
237
+ const cleanStdout = idx > 0 ? stdout.slice(idx) : stdout;
238
+ try { return JSON.parse(cleanStdout); } catch {}
239
+ }
240
+ throw new Error(`gateway call ${method} failed: ${stderr || stdout || err.message}`, {
241
+ cause: err,
242
+ });
243
+ }
244
+ }
245
+
246
+ // -- Gateway Error Log Check ----------------------------------
247
+
248
+ /**
249
+ * Check the gateway error log for 529/FailoverError/overload errors
250
+ * matching a specific session key.
251
+ *
252
+ * Scans the last N bytes of gateway.err.log for diagnostic lane task errors
253
+ * that reference the session key and match overload patterns.
254
+ *
255
+ * @param {string} sessionKey - The session key to check
256
+ * @returns {{ found: boolean, error: string|null, timestamp: string|null }}
257
+ */
258
+ function check529InGatewayLog(sessionKey) {
259
+ const OVERLOAD_PATTERNS = [
260
+ /529/i,
261
+ /failover\s*error/i,
262
+ /overload/i,
263
+ /temporarily\s+overloaded/i,
264
+ ];
265
+
266
+ try {
267
+ const logPath = join(HOME_DIR, '.openclaw', 'logs', 'gateway.err.log');
268
+ if (!existsSync(logPath)) return { found: false, error: null, timestamp: null };
269
+
270
+ // Read last 512KB of the log (sufficient for recent errors)
271
+ const fileStat = statSync(logPath);
272
+ const readSize = Math.min(fileStat.size, 512 * 1024);
273
+ const fd = openSync(logPath, 'r');
274
+ const buf = Buffer.alloc(readSize);
275
+ readSync(fd, buf, 0, readSize, Math.max(0, fileStat.size - readSize));
276
+ closeSync(fd);
277
+
278
+ const tail = buf.toString('utf-8');
279
+ const lines = tail.split('\n');
280
+
281
+ // Search backwards for the most recent match
282
+ for (let i = lines.length - 1; i >= 0; i--) {
283
+ const line = lines[i];
284
+ if (!line.includes(sessionKey)) continue;
285
+ if (!line.includes('lane task error')) continue;
286
+
287
+ // Extract the error message
288
+ const errorMatch = line.match(/error="([^"]+)"/);
289
+ if (!errorMatch) continue;
290
+
291
+ const errorMsg = errorMatch[1];
292
+ if (OVERLOAD_PATTERNS.some(p => p.test(errorMsg))) {
293
+ // Extract timestamp
294
+ const tsMatch = line.match(/^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)/);
295
+ return {
296
+ found: true,
297
+ error: `FailoverError (529): ${errorMsg}`,
298
+ timestamp: tsMatch ? tsMatch[1] : null,
299
+ };
300
+ }
301
+ }
302
+
303
+ return { found: false, error: null, timestamp: null };
304
+ } catch {
305
+ return { found: false, error: null, timestamp: null };
306
+ }
307
+ }
308
+
309
+ // -- Sessions Store (Direct Read) -----------------------------
310
+
311
+ /**
312
+ * Read the sessions.json store for an agent directly from disk.
313
+ * This is the ground truth for session state -- sessions spawned via the
314
+ * dispatcher HTTP agent endpoint appear here but NOT in sessions_list API.
315
+ *
316
+ * Sessions are NOT pruned on completion -- completed sessions stay in the file.
317
+ *
318
+ * @param {string} agent - Agent ID (default: 'main')
319
+ * @returns {Object|null} - The sessions store object, or null on error
320
+ */
321
+ function readSessionsStore(agent = 'main') {
322
+ try {
323
+ const sessionsPath = join(HOME_DIR, '.openclaw', 'agents', agent, 'sessions', 'sessions.json');
324
+ return JSON.parse(readFileSync(sessionsPath, 'utf-8'));
325
+ } catch {
326
+ return null;
327
+ }
328
+ }
329
+
330
+ /**
331
+ * Auto-detect the originating channel from the most recently active main session.
332
+ * Reads sessions.json, finds sessions active within the last 10 minutes,
333
+ * excludes subagent sessions, returns deliveryContext.to of the most recent one.
334
+ *
335
+ * @returns {string|null} - e.g. "telegram:-100200000000", or null if not found
336
+ */
337
+ function getActiveOriginFromSessions() {
338
+ const store = readSessionsStore("main");
339
+ if (!store) return null;
340
+
341
+ let best = null;
342
+ let bestTime = 0;
343
+ const TEN_MIN_MS = 10 * 60 * 1000;
344
+
345
+ for (const [key, session] of Object.entries(store)) {
346
+ // Only consider main sessions, not subagents
347
+ // Pattern: agent:main:<channel>:<type>:<id> but NOT agent:main:subagent:*
348
+ if (!key.startsWith("agent:main:")) continue;
349
+ if (key.includes(":subagent:")) continue;
350
+
351
+ const updatedAt = session.updatedAt
352
+ ? (typeof session.updatedAt === "number"
353
+ ? session.updatedAt
354
+ : new Date(session.updatedAt).getTime())
355
+ : 0;
356
+
357
+ // Must be recently active
358
+ if (Date.now() - updatedAt > TEN_MIN_MS) continue;
359
+
360
+ if (updatedAt > bestTime) {
361
+ // Prefer deliveryContext.to if available
362
+ const deliveryTo = session.deliveryContext?.to || null;
363
+ if (deliveryTo) {
364
+ bestTime = updatedAt;
365
+ // deliveryContext.to format: "telegram:-100200000000"
366
+ // Convert to origin format: "telegram:-100200000000"
367
+ best = deliveryTo;
368
+ }
369
+ }
370
+ }
371
+
372
+ return best;
373
+ }
374
+
375
+ /**
376
+ * Parse the agent ID from a session key.
377
+ * Session key format: agent:{agentId}:...
378
+ * Falls back to 'main' for malformed keys.
379
+ */
380
+ function agentFromSessionKey(sessionKey) {
381
+ if (!sessionKey) return 'main';
382
+ const parts = sessionKey.split(':');
383
+ if (parts.length >= 2 && parts[0] === 'agent') return parts[1];
384
+ return 'main';
385
+ }
386
+
387
+ // -- Gateway Session State Check ------------------------------
388
+
389
+ /**
390
+ * Determine if a session should be auto-resolved as "done" based on sessions.json state.
391
+ *
392
+ * Decision logic (in priority order):
393
+ * 1. Store unavailable (null) -> do NOT resolve (safe default)
394
+ * 2. Session key NOT in store -> resolve (never spawned or spawn failure)
395
+ * 3. Session found but idle past threshold -> resolve (completed)
396
+ * 4. Session has recent activity -> do NOT resolve
397
+ *
398
+ * @param {string} sessionKey - The session key to check
399
+ * @param {Object|null} sessionsStore - Sessions.json object (null = unavailable)
400
+ * @param {number} thresholdMs - Silence threshold in ms
401
+ * @param {boolean} [sessionEverFound=true] - Whether the session was ever seen in the store.
402
+ * Pass false to get a distinct "spawn likely failed"
403
+ * reason instead of "session not found in sessions store".
404
+ * @param {number} [spawnedAtMs=0] - Timestamp (ms) when the session was spawned (0 = unknown)
405
+ * @returns {{ shouldResolve: boolean, reason: string, lastActivity: number|null, is529?: boolean, errorMsg?: string }}
406
+ */
407
+ function checkSessionDone(sessionKey, sessionsStore, thresholdMs, sessionEverFound = true, spawnedAtMs = 0) {
408
+ // 0. Check gateway error log for 529/overload errors FIRST.
409
+ // If we find a 529, we should resolve as error, not done.
410
+ const logCheck = check529InGatewayLog(sessionKey);
411
+
412
+ if (sessionsStore === null) {
413
+ // Store unavailable -- safe default is to NOT auto-resolve
414
+ return {
415
+ shouldResolve: false,
416
+ reason: 'sessions store unavailable for state check',
417
+ lastActivity: null,
418
+ };
419
+ }
420
+
421
+ // 1. Not in sessions store -> session never appeared or already cleaned up
422
+ // BUT: young sessions (<5 min old) may simply not have propagated yet,
423
+ // especially right after a gateway restart. Don't auto-resolve those.
424
+ // Also: in openclaw 2026.3.13+, subagent sessions are tracked via
425
+ // SessionBindingService and are NOT written to sessions.json. Fall back
426
+ // to the gateway sessions.list API before concluding the session is done.
427
+ const YOUNG_SESSION_MS = 5 * 60 * 1000;
428
+ if (!sessionsStore[sessionKey]) {
429
+ const ageMs = spawnedAtMs ? Date.now() - spawnedAtMs : Infinity;
430
+ if (ageMs < YOUNG_SESSION_MS) {
431
+ return {
432
+ shouldResolve: false,
433
+ reason: 'session young, not yet in sessions store -- deferring',
434
+ lastActivity: null,
435
+ };
436
+ }
437
+
438
+ // Gateway API fallback: check if session is actually still active.
439
+ // Subagents in 2026.3.13+ are NOT written to sessions.json, so absence
440
+ // from the store does not mean the session is gone.
441
+ try {
442
+ const listResult = gatewayCall('sessions.list', { activeMinutes: 1440 }, { timeout: 8000 });
443
+ const liveSession = listResult?.sessions?.find(s => s.key === sessionKey);
444
+ if (liveSession) {
445
+ // Session is alive in gateway -- do NOT auto-resolve
446
+ return {
447
+ shouldResolve: false,
448
+ reason: 'session not in sessions.json but confirmed active via gateway API',
449
+ lastActivity: liveSession.updatedAt || null,
450
+ };
451
+ }
452
+ } catch {
453
+ // Gateway unreachable -- safe default: do NOT auto-resolve
454
+ return {
455
+ shouldResolve: false,
456
+ reason: 'sessions store miss + gateway API unreachable -- deferring',
457
+ lastActivity: null,
458
+ };
459
+ }
460
+
461
+ return {
462
+ shouldResolve: true,
463
+ reason: logCheck.found
464
+ ? `529/overload error detected: ${logCheck.error}`
465
+ : sessionEverFound
466
+ ? 'session not found in sessions store or gateway API'
467
+ : 'session never found -- spawn likely failed',
468
+ lastActivity: null,
469
+ is529: logCheck.found,
470
+ errorMsg: logCheck.error || null,
471
+ };
472
+ }
473
+
474
+ // 2. Session exists in store, check idle time.
475
+ const entry = sessionsStore[sessionKey];
476
+ const lastActivity = entry.updatedAt || 0;
477
+ const silenceMs = Date.now() - lastActivity;
478
+
479
+ if (silenceMs >= thresholdMs) {
480
+ return {
481
+ shouldResolve: true,
482
+ reason: logCheck.found
483
+ ? `529/overload error detected: ${logCheck.error}`
484
+ : `session idle ${Math.round(silenceMs / 60000)}min in sessions store (completed)`,
485
+ lastActivity,
486
+ is529: logCheck.found,
487
+ errorMsg: logCheck.error || null,
488
+ };
489
+ }
490
+
491
+ // Session has recent activity -- might still be working
492
+ return {
493
+ shouldResolve: false,
494
+ reason: 'session has recent activity in sessions store',
495
+ lastActivity,
496
+ };
497
+ }
498
+
499
+ // -- Watchdog Helpers -----------------------------------------
500
+
501
+ /**
502
+ * Disarm (disable) a watchdog job for a label if one is registered.
503
+ * Best-effort -- failures are logged but don't throw.
504
+ */
505
+ function disarmWatchdog(label) {
506
+ const entry = getLabel(label);
507
+ if (!entry?.watchdogJobId) return;
508
+ try {
509
+ const schedulerCli = join(__dirname, '..', 'cli.js');
510
+ execFileSync(process.execPath, [schedulerCli, 'jobs', 'disable', entry.watchdogJobId], {
511
+ encoding: 'utf-8',
512
+ timeout: 5000,
513
+ stdio: ['pipe', 'pipe', 'pipe'],
514
+ });
515
+ process.stderr.write(`[${BRAND}] watchdog disarmed for ${label}\n`);
516
+ } catch (err) {
517
+ process.stderr.write(`[${BRAND}] watchdog disarm failed for ${label}: ${err.message}\n`);
518
+ }
519
+ }
520
+
521
+ // -- Session Helpers ------------------------------------------
522
+
523
+ /** Build a unique session key for a new subagent session. */
524
+ function makeSessionKey(agentId) {
525
+ return `agent:${agentId}:subagent:${randomUUID()}`;
526
+ }
527
+
528
+ // -- Subcommands ----------------------------------------------
529
+
530
+ /**
531
+ * enqueue -- spawn a session via gateway API.
532
+ *
533
+ * Flags:
534
+ * --label <string> Required. Human-readable name
535
+ * --message <string> Required. Prompt sent to the agent
536
+ * --agent <string> Agent ID (default: main)
537
+ * --thinking <string> Reasoning level: low|high|xhigh (default: not set)
538
+ * --timeout <seconds> Run timeout in seconds (default: 300)
539
+ * --origin <origin> Required. Where the job was dispatched from (e.g. "telegram:<your-user-id>", "system")
540
+ * --deliver-to <target> Delivery target (e.g. Telegram chat ID). Enables deliver:true on the gateway call.
541
+ * Defaults to origin chat ID when --origin is a "telegram:<id>" string.
542
+ * --deliver-channel <ch> Delivery channel for --deliver-to (default: telegram)
543
+ * --delivery-mode <mode> announce|announce-always|none (default: announce)
544
+ * --mode <fresh|reuse>
545
+ * fresh -- always spawn new session (default)
546
+ * reuse -- look up prior session_key for this label, send into it
547
+ * --session-key <key> Explicit session key override
548
+ * --model <string> Model override (e.g. anthropic/claude-sonnet-4-6)
549
+ */
550
+ async function cmdEnqueue(flags) {
551
+ const label = flags.label;
552
+ let message = flags.message;
553
+ if (!label) die('--label is required', 2);
554
+ // Support --message-file for multiline prompts without shell escaping issues
555
+ if (!message && flags['message-file']) {
556
+ try {
557
+ message = readFileSync(flags['message-file'], 'utf-8').trim();
558
+ } catch (err) {
559
+ die(`--message-file: could not read file: ${err.message}`, 2);
560
+ }
561
+ }
562
+ if (!message) die('--message or --message-file is required', 2);
563
+
564
+ const agent = flags.agent || 'main';
565
+ const thinking = flags.thinking || null;
566
+ const timeoutS = parseInt(flags.timeout || '300', 10);
567
+ if (!Number.isFinite(timeoutS) || timeoutS <= 0) die('--timeout must be a positive integer', 2);
568
+ // Warn loudly when --timeout falls back to default -- silent fallback caused hard-to-debug
569
+ // watcher kills: the flag parser silently drops flags that appear after a multiline --message
570
+ // value in shell heredocs. Operator should always pass --timeout explicitly.
571
+ if (!flags.timeout) {
572
+ process.stderr.write(`[${BRAND}] WARNING: --timeout not specified, defaulting to 300s. ` +
573
+ `Pass --timeout explicitly (≥1200 for thinking=high tasks) to avoid premature watcher kills.\n`);
574
+ }
575
+ let origin = flags.origin || null;
576
+
577
+ // Auto-detect origin from active sessions if not explicitly provided
578
+ if (!origin) {
579
+ origin = getActiveOriginFromSessions();
580
+ if (origin) {
581
+ process.stderr.write(`[${BRAND}] auto-detected origin from active session: ${origin}\n`);
582
+ }
583
+ }
584
+
585
+ // -- Auto-derive deliver-to from origin ---------------------------------
586
+ // If origin is "telegram:<id>", use <id> as the default deliver-to target.
587
+ let defaultDeliverTo = null;
588
+ let defaultDeliverCh = 'telegram';
589
+ if (origin) {
590
+ const originMatch = /^([^:]+):(.+)$/.exec(origin);
591
+ if (originMatch) {
592
+ defaultDeliverCh = originMatch[1];
593
+ defaultDeliverTo = originMatch[2];
594
+ }
595
+ }
596
+
597
+ const deliverTo = flags['deliver-to'] || defaultDeliverTo;
598
+ const deliverChannel = flags['deliver-channel'] || defaultDeliverCh || 'telegram';
599
+ const deliverMode = flags['delivery-mode'] || 'announce';
600
+ const mode = flags.mode || 'fresh';
601
+
602
+ // -- Verify command flag -----------------------------------
603
+ const verifyCmd = flags['verify-cmd'] || null;
604
+
605
+ // -- Watchdog monitoring flags -----------------------------
606
+ const noMonitorRaw = flags['no-monitor'];
607
+ const noMonitor = !!noMonitorRaw;
608
+ const monitorEnabled = !noMonitor && flags.monitor !== 'false';
609
+ const monitorInterval = flags['monitor-interval'] || config.watchdogIntervalCron || '*/15 * * * *';
610
+ const monitorTimeout = parseInt(flags['monitor-timeout'] || String(config.watchdogTimeoutMin ?? 60), 10);
611
+ if (!Number.isFinite(monitorTimeout) || monitorTimeout <= 0) die('--monitor-timeout must be a positive integer', 2);
612
+
613
+ // -- Delivery enforcement for agentTurn jobs -----------------
614
+ // agentTurn jobs must have a delivery target OR explicitly opt out via --no-monitor "<reason>"
615
+ const isAgentTurn = !flags['payload-kind'] || flags['payload-kind'] === 'agentTurn';
616
+ if (isAgentTurn && !deliverTo && !noMonitor) {
617
+ die(
618
+ "REJECTED: --deliver-to is required for dispatch jobs.\n" +
619
+ "Pass --deliver-to <chat_id> (e.g. --deliver-to -100200000000 for a group, " +
620
+ "or --deliver-to 123456789 for a DM).\n" +
621
+ "Alternatively, pass --origin telegram:<chat_id> to auto-derive the delivery target.\n" +
622
+ "Pass --no-monitor \"<reason>\" only if you explicitly want to skip delivery (audit trail required).",
623
+ 2
624
+ );
625
+ }
626
+
627
+ // Dynamic branding: resolve per-agent brand name
628
+ const agentBrand = config.agents?.[agent]?.name || (agent !== 'main' ? agent : null) || config.name || 'dispatch';
629
+ const model = flags.model || null;
630
+
631
+ // -- Session key resolution ----------------------------------
632
+ let sessionKey = flags['session-key'] || null;
633
+
634
+ if (!sessionKey && mode === 'reuse') {
635
+ const existing = getLabel(label);
636
+ if (existing?.sessionKey) {
637
+ sessionKey = existing.sessionKey;
638
+ process.stderr.write(`[${agentBrand}] mode=reuse -> continuing session ${sessionKey}\n`);
639
+ } else {
640
+ die(`mode=reuse: no prior session found for label "${label}". Use --mode fresh.`);
641
+ }
642
+ }
643
+
644
+ const isFresh = !sessionKey;
645
+ if (isFresh) {
646
+ sessionKey = makeSessionKey(agent);
647
+ }
648
+
649
+ const idem = randomUUID();
650
+
651
+ // -- Patch session (model, thinking, spawnDepth) if fresh ----
652
+ if (isFresh) {
653
+ try {
654
+ gatewayCall('sessions.patch', { key: sessionKey, spawnDepth: 1 }, { timeout: 10000 });
655
+ } catch (err) {
656
+ die(`sessions.patch (spawnDepth) failed: ${err.message}`);
657
+ }
658
+
659
+ if (model) {
660
+ try {
661
+ gatewayCall('sessions.patch', { key: sessionKey, model }, { timeout: 10000 });
662
+ } catch (err) {
663
+ die(`sessions.patch (model) failed: ${err.message}`);
664
+ }
665
+ }
666
+
667
+ if (thinking) {
668
+ try {
669
+ gatewayCall('sessions.patch', {
670
+ key: sessionKey,
671
+ thinkingLevel: thinking === 'off' ? null : thinking,
672
+ }, { timeout: 10000 });
673
+ } catch (err) {
674
+ process.stderr.write(`[${agentBrand}] sessions.patch (thinking) warning: ${err.message}\n`);
675
+ }
676
+ }
677
+ }
678
+
679
+ // -- Build the task message ----------------------------------
680
+ const parts = [
681
+ `[Subagent Context] You are running as a subagent (depth 1/3). Results auto-announce to your requester; do not busy-poll for status.`,
682
+ ``,
683
+ ];
684
+
685
+ // -- Checkpoint notify command (mid-run status messages) -----
686
+ // Agents can call this command at logical checkpoints to send status updates
687
+ // that will be delivered to the inbox consumer (and ultimately Telegram).
688
+ const schedulerCliPath = join(__dirname, '..', 'cli.js');
689
+ const checkpointNotifyCmd = `node '${schedulerCliPath}' messages send --from '${label.replace(/'/g, "'\\''")}' --to main --kind status --body`;
690
+ // TODO: Inject CHECKPOINT_NOTIFY_CMD as an env var into the agent session so
691
+ // agents can discover the checkpoint command programmatically (not just from
692
+ // the prompt text at line ~714). Depends on the gateway implementing the
693
+ // x-openclaw-env-inject receiver (PR #5 sends the header, gateway ignores it
694
+ // until receiver support lands). Once available, pass it alongside materialized
695
+ // credentials via the env-inject header in the gatewayCall('agent', ...) below.
696
+
697
+ // Prepend CHECK_IN template when delivery target is set
698
+ if (deliverTo) {
699
+ parts.push(`---`);
700
+ parts.push(`CHECK_IN: To report progress, use curl:`);
701
+ parts.push(`GW_TOKEN=$(node -e "process.stdout.write(JSON.parse(require('fs').readFileSync(require('os').homedir()+'/.openclaw/openclaw.json','utf8')).gateway.auth.token)")`);
702
+ // Sanitize values for safe embedding in JSON inside a shell single-quoted string
703
+ const safeJson = (v) => String(v || '').replace(/[\\'"\n\r]/g, '');
704
+ const safeChannel = safeJson(deliverChannel || 'telegram');
705
+ const safeTarget = safeJson(deliverTo);
706
+ const safeLabel = safeJson(label);
707
+ parts.push(`curl -s -X POST ${GATEWAY_URL}/tools/invoke -H 'Content-Type: application/json' -H "Authorization: Bearer $GW_TOKEN" -d '{"tool":"message","args":{"action":"send","channel":"${safeChannel}","target":"${safeTarget}","message":"[${safeLabel}] <your status here>"},"sessionKey":"main"}'`);
708
+ parts.push(`Call this every ~5 minutes with a brief progress update.`);
709
+ parts.push(`---`);
710
+ parts.push(``);
711
+ }
712
+
713
+ parts.push(`[Subagent Task]: ${message}`);
714
+
715
+ // -- Checkpoint notify instructions ---------------------------
716
+ parts.push(``);
717
+ parts.push(`---`);
718
+ parts.push(`CHECKPOINT MESSAGING: You can send mid-run status updates using this command:`);
719
+ parts.push(` ${checkpointNotifyCmd} "<message>"`);
720
+ parts.push(`Call this at logical checkpoints: start of a major step, on conflict/error, before completing.`);
721
+ parts.push(`Example: ${checkpointNotifyCmd} "Starting step 2: running tests"`);
722
+ parts.push(`(Environment variable CHECKPOINT_NOTIFY_CMD is set to: ${checkpointNotifyCmd})`);
723
+ parts.push(`---`);
724
+ parts.push(``);
725
+
726
+ // Append agent-side done signal instructions (Fix 2 -- push-based completion)
727
+ // Always point to dispatch/index.mjs (__dirname) -- the canonical done handler.
728
+ const doneScriptPath = join(__dirname, 'index.mjs');
729
+ parts.push(``);
730
+ parts.push(`---`);
731
+ parts.push(`COMPLETION SIGNAL -- READ CAREFULLY:`);
732
+ parts.push(``);
733
+ parts.push(`Only call this command after ALL of the following are true:`);
734
+ parts.push(` 1. All file edits are saved`);
735
+ parts.push(` 2. All commits are pushed (git push completed successfully)`);
736
+ parts.push(` 3. All API calls (e.g. GitHub comment replies) are done`);
737
+ parts.push(` 4. You have verified the work is complete`);
738
+ parts.push(``);
739
+ parts.push(`Call this as your ABSOLUTE FINAL action -- nothing else runs after this:`);
740
+ parts.push(` node '${doneScriptPath}' done --label '${label.replace(/'/g, "'\\''")}' \\`);
741
+ parts.push(` --summary "<what you actually did>" \\`);
742
+ parts.push(` --checklist '{"work_complete":true,"tests_passed":true,"pushed":true}' \\`);
743
+ parts.push(` [--sha "<git commit SHA if applicable>"]`);
744
+ parts.push(``);
745
+ parts.push(`Checklist rules:`);
746
+ parts.push(` - work_complete MUST be true -- you are asserting you have finished ALL assigned work`);
747
+ parts.push(` - If tests failed or push failed, do NOT set tests_passed:true or pushed:true -- instead continue working`);
748
+ parts.push(` - Only include tests_passed/pushed if they apply to your task`);
749
+ parts.push(`If your task involved git commits, --sha is required and must be the actual SHA of your pushed commit. The done script will reject invented or placeholder SHAs.`);
750
+ parts.push(`Do NOT call done while planning, reading files, or mid-task. If you have not yet pushed a commit, you are not done.`);
751
+ parts.push(`---`);
752
+ parts.push(``);
753
+ parts.push(`---`);
754
+ parts.push(`DELIVERY RULE: Do NOT use the message tool, sessions_send, or any direct messaging to send updates or results to Telegram or any chat. Do NOT reference chat IDs, user IDs, or delivery targets in your work.`);
755
+ parts.push(`Your ONLY output channel is the done signal above. The scheduler handles delivery automatically.`);
756
+ if (origin) {
757
+ parts.push(`Note: This job will be delivered to origin channel: ${origin}`);
758
+ }
759
+ parts.push(`---`);
760
+
761
+ const taskMessage = parts.join('\n');
762
+
763
+ // -- Call gateway agent method -------------------------------
764
+ // Gateway deliver is used as a fast-path secondary. The scheduler watcher
765
+ // (created below) is the primary delivery path with retry + audit trail.
766
+ // Both may fire -- at-least-once semantics, duplicates acceptable.
767
+ try {
768
+ const response = gatewayCall('agent', {
769
+ message: taskMessage,
770
+ sessionKey,
771
+ idempotencyKey: idem,
772
+ deliver: !!deliverTo,
773
+ lane: 'subagent',
774
+ timeout: timeoutS,
775
+ label: label,
776
+ thinking: thinking || undefined,
777
+ ...(deliverTo ? {
778
+ channel: deliverChannel,
779
+ replyTo: deliverTo,
780
+ replyChannel: deliverChannel,
781
+ } : {}),
782
+ }, { timeout: 15000 });
783
+
784
+ // Update ledger
785
+ setLabel(label, {
786
+ sessionKey,
787
+ runId: response?.runId || idem,
788
+ agent,
789
+ mode: isFresh ? 'fresh' : 'reuse',
790
+ model: model || null,
791
+ thinking,
792
+ origin: origin || null,
793
+ deliverTo: deliverTo || null,
794
+ deliverChannel: deliverChannel || null,
795
+ deliveryMode: deliverMode || null,
796
+ verifyCmd: verifyCmd || null,
797
+ spawnedAt: new Date().toISOString(),
798
+ timeoutSeconds: timeoutS,
799
+ // Fix 4: Store timeout so cmdDone threshold logic can use it correctly.
800
+ timeout: timeoutS,
801
+ status: 'running',
802
+ summary: null,
803
+ error: null,
804
+ // Store task prompt for gate checks in done (first 2000 chars)
805
+ taskPrompt: message.slice(0, 2000),
806
+ });
807
+
808
+ // Fire dispatch.started hook (best-effort)
809
+ await onStarted({
810
+ label, job_id: idem, run_id: response?.runId || idem,
811
+ agent, mode, session_key: sessionKey,
812
+ }).catch(() => {});
813
+
814
+ // -- Send "Starting" notification via gateway HTTP API -----
815
+ if (deliverTo && GATEWAY_TOKEN) {
816
+ try {
817
+ await fetch(`${GATEWAY_URL}/tools/invoke`, {
818
+ method: 'POST',
819
+ headers: {
820
+ 'Content-Type': 'application/json',
821
+ 'Authorization': `Bearer ${GATEWAY_TOKEN}`,
822
+ },
823
+ body: JSON.stringify({
824
+ tool: 'message',
825
+ args: {
826
+ action: 'send',
827
+ channel: deliverChannel,
828
+ target: deliverTo,
829
+ message: `🌶️ *${agentBrand}* [${label}] starting...`,
830
+ },
831
+ sessionKey: 'main',
832
+ }),
833
+ signal: AbortSignal.timeout(5000),
834
+ });
835
+ } catch (err) {
836
+ process.stderr.write(`[${agentBrand}] starting notification failed: ${err.message}\n`);
837
+ }
838
+ }
839
+
840
+ // -- Register scheduler watcher for delivery ---------------
841
+ // Creates a one-shot shell job that runs watcher.mjs (blocks until session
842
+ // completes, outputs result). The scheduler's handleDelivery delivers with
843
+ // retry, alias resolution, and audit trail in scheduler.db.
844
+ // Gateway deliver:true is kept as a fast-path secondary (see deliver flag above).
845
+ const sq = s => String(s).replace(/'/g, "'\\''");
846
+ let schedulerWatcherOk = false;
847
+ if (deliverTo && deliverMode !== 'none') {
848
+ try {
849
+ const watcherPath = join(__dirname, 'watcher.mjs');
850
+ // Watcher timeout = session timeout + 120s buffer for startup/polling
851
+ const watcherTimeoutS = timeoutS + 120;
852
+ const watcherCmd = `DISPATCH_LABELS_PATH='${sq(LABELS_PATH)}' '${sq(process.execPath)}' '${sq(watcherPath)}' --label '${sq(label)}' --timeout ${watcherTimeoutS} --poll-interval 20`;
853
+
854
+ const nowUtc = new Date().toISOString().replace('T', ' ').slice(0, 19);
855
+ const jobSpec = JSON.stringify({
856
+ name: `${agentBrand}-deliver:${label}`,
857
+ schedule_kind: 'at',
858
+ schedule_at: nowUtc,
859
+ session_target: 'shell',
860
+ payload_kind: 'shellCommand',
861
+ payload_message: watcherCmd,
862
+ delivery_mode: 'announce-always',
863
+ delivery_channel: deliverChannel,
864
+ delivery_to: deliverTo,
865
+ delivery_guarantee: 'at-least-once',
866
+ ttl_hours: config.deliver_watcher_ttl_hours ?? 48, // configurable TTL (deliver_watcher_ttl_hours); default 48h
867
+ overlap_policy: 'skip',
868
+ // Shell ceiling = max(initial timeout, rolling extension cap) + headroom.
869
+ // The watcher can extend its deadline up to MAX_DEADLINE_EXTENSION (4h) on
870
+ // activity (token growth / JSONL mtime). Headroom covers 2*FLAT_WINDOW + slop.
871
+ // Watcher constants: FLAT_WINDOW_MS=180s, MAX_DEADLINE_EXTENSION=4h.
872
+ run_timeout_ms: Math.max(watcherTimeoutS, 4 * 3600) * 1000
873
+ + 420 * 1000, // +7min headroom (2*FLAT_WINDOW + 1min slop)
874
+ delete_after_run: 1, // auto-delete after watcher completes
875
+ origin: origin || 'system',
876
+ });
877
+ const schedulerCli = join(__dirname, '..', 'cli.js');
878
+ execFileSync(process.execPath, [schedulerCli, 'jobs', 'add', jobSpec], {
879
+ encoding: 'utf-8',
880
+ timeout: 10000,
881
+ stdio: ['pipe', 'pipe', 'pipe'],
882
+ });
883
+ schedulerWatcherOk = true;
884
+ process.stderr.write(`[${agentBrand}] scheduler watcher registered: ${agentBrand}-deliver:${label}\n`);
885
+ } catch (err) {
886
+ process.stderr.write(`[${agentBrand}] scheduler watcher FAILED (gateway fallback active): ${err.message}\n`);
887
+ }
888
+ }
889
+
890
+ // -- Register watchdog monitoring job ---------------------
891
+ let watchdogJobOk = false;
892
+ let watchdogJobId = null;
893
+ if (monitorEnabled && deliverTo) {
894
+ try {
895
+ const checkCmd = `'${sq(process.execPath)}' '${sq(join(__dirname, 'index.mjs'))}' stuck --label '${sq(label)}' --threshold-min ${monitorTimeout}`;
896
+ const alertChannel = deliverChannel || 'telegram';
897
+ const alertTarget = deliverTo;
898
+ const watchdogSpec = JSON.stringify({
899
+ name: `watchdog:${label}`,
900
+ job_type: 'watchdog',
901
+ schedule_cron: monitorInterval,
902
+ session_target: 'shell',
903
+ payload_kind: 'shellCommand',
904
+ payload_message: checkCmd,
905
+ delivery_mode: 'none',
906
+ run_timeout_ms: 120_000, // 2 min: watchdog shell check should be fast
907
+ watchdog_target_label: label,
908
+ watchdog_check_cmd: checkCmd,
909
+ watchdog_timeout_min: monitorTimeout,
910
+ watchdog_alert_channel: alertChannel,
911
+ watchdog_alert_target: alertTarget,
912
+ watchdog_self_destruct: 1,
913
+ watchdog_started_at: new Date().toISOString(),
914
+ delete_after_run: 1, // auto-delete after watchdog fires
915
+ origin: origin || 'system',
916
+ });
917
+ const schedulerCli = join(__dirname, '..', 'cli.js');
918
+ const addResult = execFileSync(process.execPath, [schedulerCli, 'jobs', 'add', watchdogSpec, '--watchdog', '--json'], {
919
+ encoding: 'utf-8',
920
+ timeout: 10000,
921
+ stdio: ['pipe', 'pipe', 'pipe'],
922
+ });
923
+ try {
924
+ const parsed = JSON.parse(addResult.trim());
925
+ watchdogJobId = parsed?.job?.id || null;
926
+ } catch {}
927
+ watchdogJobOk = true;
928
+
929
+ // Store watchdog job ID in labels ledger for later cleanup
930
+ if (watchdogJobId) {
931
+ setLabel(label, { watchdogJobId });
932
+ }
933
+
934
+ process.stderr.write(`[${agentBrand}] watchdog registered: ${monitorInterval}, timeout: ${monitorTimeout}min\n`);
935
+ } catch (err) {
936
+ process.stderr.write(`[${agentBrand}] watchdog registration FAILED: ${err.message}\n`);
937
+ }
938
+ }
939
+
940
+ out({
941
+ ok: true,
942
+ label,
943
+ sessionKey,
944
+ runId: response?.runId || idem,
945
+ mode: isFresh ? 'fresh' : 'reuse',
946
+ agent,
947
+ status: 'accepted',
948
+ delivery: deliverTo ? {
949
+ scheduler: schedulerWatcherOk,
950
+ gateway: !!deliverTo,
951
+ target: deliverTo,
952
+ channel: deliverChannel,
953
+ } : null,
954
+ watchdog: monitorEnabled ? {
955
+ enabled: watchdogJobOk,
956
+ jobId: watchdogJobId,
957
+ interval: monitorInterval,
958
+ timeout: monitorTimeout,
959
+ ...(monitorEnabled && !deliverTo ? { skipped: true, reason: 'no --deliver-to target' } : {}),
960
+ } : null,
961
+ message: schedulerWatcherOk
962
+ ? 'Session spawned. Delivery via scheduler (primary) + gateway (secondary).'
963
+ : deliverTo
964
+ ? 'Session spawned. Delivery via gateway only (scheduler watcher failed).'
965
+ : 'Session spawned via gateway. Agent is running.',
966
+ });
967
+
968
+ // -- Post-spawn verification (Fix 3) --------------------------------
969
+ // Canary: poll sessions.json up to 3 times at 10s intervals to confirm the
970
+ // session appeared in the store. Non-fatal -- output is already written above.
971
+ // If the session never shows up, stderr gets a loud warning and ledger status
972
+ // is set to 'spawn-warning'. The watcher provides the definitive error path.
973
+ const SPAWN_POLL_MAX = 3;
974
+ const SPAWN_POLL_DELAY_MS = 10_000;
975
+ let spawnConfirmed = false;
976
+ for (let spawnPoll = 0; spawnPoll < SPAWN_POLL_MAX; spawnPoll++) {
977
+ await sleep(SPAWN_POLL_DELAY_MS);
978
+ const spawnStore = readSessionsStore(agent);
979
+ if (spawnStore && sessionKey in spawnStore) {
980
+ spawnConfirmed = true;
981
+ break;
982
+ }
983
+ }
984
+ if (!spawnConfirmed) {
985
+ process.stderr.write(
986
+ `[${agentBrand}] WARNING: session ${sessionKey} did not appear in gateway after ` +
987
+ `${(SPAWN_POLL_MAX * SPAWN_POLL_DELAY_MS) / 1000}s -- spawn may have failed\n`
988
+ );
989
+ setLabel(label, { status: 'spawn-warning' });
990
+ }
991
+ } catch (err) {
992
+ die(`gateway agent call failed: ${err.message}`);
993
+ }
994
+ }
995
+
996
+ /**
997
+ * status -- show session status for a label.
998
+ * Syncs from gateway state for "running" sessions before returning.
999
+ *
1000
+ * Flags:
1001
+ * --label <string> Required
1002
+ */
1003
+ function cmdStatus(flags) {
1004
+ const label = flags.label;
1005
+ if (!label) die('--label is required', 2);
1006
+
1007
+ const entry = getLabel(label);
1008
+ if (!entry) {
1009
+ out({ ok: true, label, found: false, message: 'No session found for this label' });
1010
+ return;
1011
+ }
1012
+
1013
+ let liveness = null;
1014
+ let syncAction = null;
1015
+
1016
+ // Read sessions.json store for state checks (replaces sessions_list API call)
1017
+ const statusAgent = entry.agent || agentFromSessionKey(entry.sessionKey) || 'main';
1018
+ const sessionsStore = readSessionsStore(statusAgent);
1019
+
1020
+ // For "running" sessions, check sessions store and auto-resolve if done
1021
+ if (entry.status === 'running' && entry.sessionKey) {
1022
+ const spawnedAtMs = entry.spawnedAt ? new Date(entry.spawnedAt).getTime() : 0;
1023
+ const ageMs = Date.now() - spawnedAtMs;
1024
+ const STARTUP_GRACE_MS = config.startupGraceMs ?? 300_000;
1025
+
1026
+ // -- Heartbeat-based liveness guard ----------------------------------
1027
+ // The watcher process writes lastPing every 60s while the session is live.
1028
+ // If the ping is fresh, the watcher is alive and working -- defer auto-resolve
1029
+ // to avoid killing sessions during slow tool calls, docker builds, etc.
1030
+ //
1031
+ // PING_STALE_MS: 3x the 60s ping interval -- if we haven't heard from the
1032
+ // watcher in 3 min, it's probably dead; fall through to check.
1033
+ // hardCeilingMs: job timeout * 1.5 -- absolute max regardless of ping age.
1034
+ // Catches zombie watchers (watcher alive but session is stuck).
1035
+ // idleThresholdMs: max(job timeout, 10 min) -- replaces the old hardcoded 10-min
1036
+ // threshold so longer jobs aren't killed at exactly 10 min.
1037
+ const PING_STALE_MS = 3 * 60 * 1000;
1038
+ const idleThresholdMs = Math.max((entry.timeoutSeconds || 600) * 1000, 10 * 60 * 1000);
1039
+ // hardCeilingMs must be >= idleThresholdMs to avoid the ceiling undercutting the
1040
+ // idle floor (e.g. timeoutSeconds=300 -> ceiling=7.5 min < idle=10 min would force
1041
+ // zombie-guard threshold for sessions that should still use idleThresholdMs).
1042
+ const hardCeilingMs = Math.max((entry.timeoutSeconds || 600) * 1000 * 1.5, idleThresholdMs * 1.5);
1043
+
1044
+ let check;
1045
+ if (ageMs < STARTUP_GRACE_MS) {
1046
+ // Within startup grace -- never auto-resolve
1047
+ check = { shouldResolve: false };
1048
+ } else if (entry.lastPing) {
1049
+ const pingAgeMs = Date.now() - new Date(entry.lastPing).getTime();
1050
+ if (pingAgeMs < PING_STALE_MS && ageMs < hardCeilingMs) {
1051
+ // Watcher alive and within job ceiling -- defer auto-resolve
1052
+ check = { shouldResolve: false };
1053
+ } else {
1054
+ // Ping stale OR past hard ceiling: fall through to session store check
1055
+ const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
1056
+ check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
1057
+ }
1058
+ } else {
1059
+ // No lastPing -- backward compat (sessions dispatched before heartbeat feature).
1060
+ // Use idleThresholdMs (job-aware) instead of the old hardcoded 10 min.
1061
+ const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
1062
+ check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
1063
+ }
1064
+
1065
+ if (check.shouldResolve) {
1066
+ if (check.is529) {
1067
+ setLabel(label, {
1068
+ status: 'error',
1069
+ error: check.errorMsg || `529/overload: ${check.reason}`,
1070
+ summary: `Auto-resolved as error: ${check.reason}`,
1071
+ });
1072
+ syncAction = `auto-resolved as 529 error: ${check.reason}`;
1073
+ } else {
1074
+ setLabel(label, {
1075
+ status: 'interrupted',
1076
+ summary: `Auto-resolved: session went idle without calling done. Work may be incomplete. (${check.reason})`,
1077
+ });
1078
+ syncAction = `auto-resolved as interrupted: ${check.reason}`;
1079
+ }
1080
+ // Disarm watchdog when session is auto-resolved
1081
+ disarmWatchdog(label);
1082
+ }
1083
+ }
1084
+
1085
+ // Build liveness from sessions.json store
1086
+ if (entry.sessionKey && sessionsStore) {
1087
+ const sessionEntry = sessionsStore[entry.sessionKey];
1088
+ if (sessionEntry) {
1089
+ liveness = {
1090
+ updatedAt: sessionEntry.updatedAt,
1091
+ ageMs: sessionEntry.updatedAt
1092
+ ? Date.now() - (typeof sessionEntry.updatedAt === 'number' ? sessionEntry.updatedAt : new Date(sessionEntry.updatedAt).getTime())
1093
+ : null,
1094
+ sessionId: sessionEntry.sessionId,
1095
+ model: sessionEntry.model || null,
1096
+ tokens: sessionEntry.totalTokens || null,
1097
+ };
1098
+ } else {
1099
+ liveness = { error: 'session not found in sessions store' };
1100
+ }
1101
+ } else if (entry.sessionKey && !sessionsStore) {
1102
+ liveness = { error: 'sessions store unavailable' };
1103
+ }
1104
+
1105
+ // Re-read entry in case we just updated it
1106
+ const current = getLabel(label) || entry;
1107
+
1108
+ out({
1109
+ ok: true,
1110
+ label,
1111
+ sessionKey: current.sessionKey,
1112
+ runId: current.runId,
1113
+ agent: current.agent,
1114
+ mode: current.mode,
1115
+ status: current.status,
1116
+ spawnedAt: current.spawnedAt,
1117
+ updatedAt: current.updatedAt,
1118
+ summary: current.summary || null,
1119
+ error: current.error || null,
1120
+ liveness,
1121
+ ...(syncAction ? { syncAction } : {}),
1122
+ });
1123
+ }
1124
+
1125
+ /**
1126
+ * stuck -- find sessions running past threshold.
1127
+ * Auto-resolves sessions the gateway considers done before alerting.
1128
+ * Exits 1 only if genuinely stuck sessions remain after sync.
1129
+ *
1130
+ * Flags:
1131
+ * --threshold-min <n> Minutes without activity to consider stuck (default: 15)
1132
+ */
1133
+ /**
1134
+ * Check if a dispatch-deliver watcher job is actively running for a label.
1135
+ * Uses scheduler DB to check for a running/recent-pending run.
1136
+ * Fails open (returns false) on any DB error.
1137
+ */
1138
+ function hasActiveWatcher(label) {
1139
+ let db = null;
1140
+ try {
1141
+ const dbPath = process.env.SCHEDULER_DB || join(HOME_DIR, '.openclaw', 'scheduler', 'scheduler.db');
1142
+ db = new Database(dbPath, { readonly: true, fileMustExist: true });
1143
+ const row = db.prepare(`
1144
+ SELECT COUNT(*) AS c
1145
+ FROM jobs j
1146
+ JOIN runs r ON r.job_id = j.id
1147
+ WHERE j.name LIKE ?
1148
+ AND (
1149
+ r.status = 'running'
1150
+ OR (r.status = 'pending' AND r.started_at > datetime('now','-5 minutes'))
1151
+ )
1152
+ `).get(`%-deliver:${label}`);
1153
+ return (row?.c || 0) > 0;
1154
+ } catch {
1155
+ return false;
1156
+ } finally {
1157
+ try { db?.close(); } catch {}
1158
+ }
1159
+ }
1160
+
1161
+ async function cmdStuck(flags) {
1162
+ const thresholdMin = parseFloat(flags['threshold-min'] || '15');
1163
+ const thresholdMs = thresholdMin * 60 * 1000;
1164
+
1165
+ const labels = loadLabels();
1166
+ const stuckSessions = [];
1167
+ const autoResolved = [];
1168
+ const watcherSkipped = [];
1169
+
1170
+ // Sessions stores are read per-agent (cached within this call)
1171
+ const sessionsStoreByAgent = {};
1172
+ function getSessionsStoreForEntry(e) {
1173
+ const ag = e.agent || agentFromSessionKey(e.sessionKey) || 'main';
1174
+ if (!(ag in sessionsStoreByAgent)) sessionsStoreByAgent[ag] = readSessionsStore(ag);
1175
+ return sessionsStoreByAgent[ag];
1176
+ }
1177
+
1178
+ for (const [name, entry] of Object.entries(labels)) {
1179
+ if (entry.status !== 'running') continue;
1180
+
1181
+ // -- Per-job timeout: don't flag until the job's own timeout has elapsed --
1182
+ const jobTimeoutMs = entry.timeoutSeconds ? entry.timeoutSeconds * 1000 : 0;
1183
+ const effectiveThreshMs = Math.max(jobTimeoutMs, thresholdMs);
1184
+
1185
+ const spawnedAt = entry.spawnedAt ? new Date(entry.spawnedAt).getTime() : 0;
1186
+ const ageMs = Date.now() - spawnedAt;
1187
+
1188
+ if (ageMs < effectiveThreshMs) continue;
1189
+
1190
+ // -- Skip if session is within startup grace period --------------------
1191
+ const STARTUP_GRACE_MS = config.startupGraceMs ?? 300_000;
1192
+ if (ageMs < STARTUP_GRACE_MS) continue;
1193
+
1194
+ // -- Skip if an active watcher is already monitoring this session ------
1195
+ if (hasActiveWatcher(name)) {
1196
+ watcherSkipped.push({ label: name, reason: 'active dispatch-deliver watcher' });
1197
+ continue;
1198
+ }
1199
+
1200
+ // -- Check sessions store state before alerting -----------
1201
+ const stuckSessionsStore = getSessionsStoreForEntry(entry);
1202
+ const check = checkSessionDone(entry.sessionKey, stuckSessionsStore, effectiveThreshMs, true, spawnedAt);
1203
+
1204
+ if (check.shouldResolve) {
1205
+ // Gateway says this session is done -- auto-mark and skip alert
1206
+ if (check.is529) {
1207
+ setLabel(name, {
1208
+ status: 'error',
1209
+ error: check.errorMsg || `529/overload: ${check.reason}`,
1210
+ summary: `Auto-resolved as error: ${check.reason}`,
1211
+ });
1212
+ autoResolved.push({ label: name, reason: `529 error: ${check.reason}` });
1213
+ } else {
1214
+ setLabel(name, {
1215
+ status: 'interrupted',
1216
+ summary: `Auto-resolved: session went idle without calling done. Work may be incomplete. (${check.reason})`,
1217
+ });
1218
+ autoResolved.push({ label: name, reason: check.reason });
1219
+ }
1220
+ // Disarm watchdog when session is auto-resolved
1221
+ disarmWatchdog(name);
1222
+ continue;
1223
+ }
1224
+
1225
+ // Session is still active (or gateway unavailable) -- evaluate as potentially stuck
1226
+ const lastActivity = check.lastActivity || spawnedAt;
1227
+ const silenceMs = Date.now() - lastActivity;
1228
+
1229
+ if (silenceMs >= effectiveThreshMs) {
1230
+ stuckSessions.push({
1231
+ label: name,
1232
+ sessionKey: entry.sessionKey,
1233
+ agent: entry.agent,
1234
+ spawnedAt: entry.spawnedAt,
1235
+ ageMin: Math.round(ageMs / 60000),
1236
+ silenceMin: Math.round(silenceMs / 60000),
1237
+ thresholdMin: Math.round(effectiveThreshMs / 60000),
1238
+ });
1239
+ }
1240
+ }
1241
+
1242
+ // Log auto-resolved sessions to stderr (informational, won't trigger delivery)
1243
+ if (autoResolved.length > 0) {
1244
+ const lines = autoResolved.map(r => ` [ok] ${r.label}: ${r.reason}`).join('\n');
1245
+ process.stderr.write(`[${BRAND}] auto-resolved ${autoResolved.length} completed session(s):\n${lines}\n`);
1246
+ }
1247
+
1248
+ if (!stuckSessions.length) {
1249
+ out({
1250
+ ok: true,
1251
+ stuck_count: 0,
1252
+ stuck_sessions: [],
1253
+ auto_resolved_count: autoResolved.length,
1254
+ auto_resolved: autoResolved,
1255
+ watcher_skipped: watcherSkipped,
1256
+ threshold_min: thresholdMin,
1257
+ });
1258
+ process.exit(0);
1259
+ }
1260
+
1261
+ const lines = stuckSessions.map(s =>
1262
+ `* ${s.label} (running ${s.ageMin}min, silent ${s.silenceMin}min)`
1263
+ ).join('\n');
1264
+
1265
+ process.stdout.write(
1266
+ `⚠️ ${BRAND}: ${stuckSessions.length} stuck session${stuckSessions.length > 1 ? 's' : ''}:\n${lines}\n`
1267
+ );
1268
+
1269
+ await onStuck(stuckSessions.map(s => ({
1270
+ id: s.sessionKey,
1271
+ job_name: s.label,
1272
+ started_at: s.spawnedAt,
1273
+ age_s: s.ageMin * 60,
1274
+ }))).catch(() => {});
1275
+
1276
+ process.exit(1);
1277
+ }
1278
+
1279
+ /**
1280
+ * sync -- reconcile labels.json with sessions store state.
1281
+ * Auto-resolves any "running" sessions that the sessions store considers done.
1282
+ *
1283
+ * Flags:
1284
+ * --dry-run Show what would change without modifying labels.json
1285
+ */
1286
+ function cmdSync(flags) {
1287
+ const dryRun = flags['dry-run'] === true;
1288
+
1289
+ const labels = loadLabels();
1290
+ const changes = [];
1291
+
1292
+ // Preload sessions stores per agent
1293
+ const syncStoreByAgent = {};
1294
+ function getSyncStore(e) {
1295
+ const ag = e.agent || agentFromSessionKey(e.sessionKey) || 'main';
1296
+ if (!(ag in syncStoreByAgent)) syncStoreByAgent[ag] = readSessionsStore(ag);
1297
+ return syncStoreByAgent[ag];
1298
+ }
1299
+
1300
+ for (const [name, entry] of Object.entries(labels)) {
1301
+ if (entry.status !== 'running') continue;
1302
+
1303
+ const syncStore = getSyncStore(entry);
1304
+ const spawnedAtMs = entry.spawnedAt ? new Date(entry.spawnedAt).getTime() : 0;
1305
+ const elapsedMs = Date.now() - spawnedAtMs;
1306
+
1307
+ // -- Heartbeat-based liveness guard (mirrors cmdStatus logic) ---------
1308
+ // Skip auto-resolve when the watcher's lastPing heartbeat is fresh.
1309
+ // See cmdStatus for full commentary on PING_STALE_MS / hardCeilingMs.
1310
+ const PING_STALE_MS_SYNC = 3 * 60 * 1000;
1311
+ const idleThresholdMsSync = Math.max((entry.timeoutSeconds || 600) * 1000, 10 * 60 * 1000);
1312
+ // hardCeilingMsSync must be >= idleThresholdMsSync (mirrors cmdStatus fix).
1313
+ const hardCeilingMsSync = Math.max((entry.timeoutSeconds || 600) * 1000 * 1.5, idleThresholdMsSync * 1.5);
1314
+
1315
+ if (entry.lastPing) {
1316
+ const pingAgeMs = Date.now() - new Date(entry.lastPing).getTime();
1317
+ if (pingAgeMs < PING_STALE_MS_SYNC && elapsedMs < hardCeilingMsSync) {
1318
+ // Watcher alive and within ceiling -- skip auto-resolve for this cycle
1319
+ continue;
1320
+ }
1321
+ }
1322
+
1323
+ const syncThresh = elapsedMs >= hardCeilingMsSync ? 2 * 60 * 1000 : idleThresholdMsSync;
1324
+ const check = checkSessionDone(entry.sessionKey, syncStore, syncThresh, true, spawnedAtMs);
1325
+
1326
+ if (check.shouldResolve) {
1327
+ const newStatus = check.is529 ? 'error' : 'interrupted';
1328
+ changes.push({ label: name, from: 'running', to: newStatus, reason: check.reason });
1329
+ if (!dryRun) {
1330
+ if (check.is529) {
1331
+ setLabel(name, {
1332
+ status: 'error',
1333
+ error: check.errorMsg || `529/overload: ${check.reason}`,
1334
+ summary: `Synced as error: ${check.reason}`,
1335
+ });
1336
+ } else {
1337
+ setLabel(name, {
1338
+ status: 'interrupted',
1339
+ summary: `Auto-resolved: session went idle without calling done. Work may be incomplete. (${check.reason})`,
1340
+ });
1341
+ }
1342
+ // Disarm watchdog when session is synced as interrupted
1343
+ disarmWatchdog(name);
1344
+ }
1345
+ }
1346
+ }
1347
+
1348
+ out({
1349
+ ok: true,
1350
+ dryRun,
1351
+ changes: changes.length,
1352
+ details: changes,
1353
+ });
1354
+ }
1355
+
1356
+ /**
1357
+ * result -- get the last assistant reply from a session.
1358
+ *
1359
+ * Flags:
1360
+ * --label <string> Required
1361
+ */
1362
+ function cmdResult(flags) {
1363
+ const label = flags.label;
1364
+ if (!label) die('--label is required', 2);
1365
+
1366
+ const entry = getLabel(label);
1367
+ if (!entry) {
1368
+ out({ ok: false, label, message: 'No session found for this label' });
1369
+ return;
1370
+ }
1371
+
1372
+ // Try to get the session transcript to find last assistant message
1373
+ let lastReply = null;
1374
+ if (entry.sessionKey) {
1375
+ try {
1376
+ const result = gatewayCall('chat.history', {
1377
+ sessionKey: entry.sessionKey,
1378
+ }, { timeout: 10000 });
1379
+
1380
+ if (result?.messages?.length) {
1381
+ for (let i = result.messages.length - 1; i >= 0; i--) {
1382
+ const e = result.messages[i];
1383
+ if (e.role === 'assistant' && e.content) {
1384
+ lastReply = typeof e.content === 'string'
1385
+ ? e.content
1386
+ : Array.isArray(e.content)
1387
+ ? e.content.map(c => c.text || '').join('')
1388
+ : JSON.stringify(e.content);
1389
+ break;
1390
+ }
1391
+ }
1392
+ }
1393
+ } catch {}
1394
+ }
1395
+
1396
+ // -- Watchdog cleanup: disable watchdog job when result is available --
1397
+ if (lastReply && entry.watchdogJobId) {
1398
+ disarmWatchdog(label);
1399
+ }
1400
+
1401
+ out({
1402
+ ok: true,
1403
+ label,
1404
+ sessionKey: entry.sessionKey,
1405
+ status: entry.status,
1406
+ spawnedAt: entry.spawnedAt,
1407
+ summary: entry.summary || (lastReply ? lastReply.slice(0, 500) : null),
1408
+ lastReply: lastReply || null,
1409
+ error: entry.error || null,
1410
+ });
1411
+ }
1412
+
1413
+ /**
1414
+ * done -- agent-side completion signal (push-based).
1415
+ * Called by the subagent itself as its LAST action when fully complete.
1416
+ * Sets labels.json status=done so the watcher resolves immediately.
1417
+ *
1418
+ * Flags:
1419
+ * --label <string> Required. Label to mark as done
1420
+ * --summary <string> Optional. One-line completion summary
1421
+ * --checklist <json> Required. JSON object asserting completion status.
1422
+ * Must include work_complete:true. Optional: tests_passed, pushed.
1423
+ * --sha <sha> Optional (required when task involves git ops). Git commit SHA.
1424
+ * --force-done Override minimum runtime guard (requires --reason).
1425
+ * --reason <string> Required with --force-done. Explains why short runtime is valid.
1426
+ */
1427
+ async function cmdDone(flags) {
1428
+ const label = flags.label;
1429
+ const rawSummary = flags.summary || 'completed (agent signal)';
1430
+ const sha = flags.sha || null;
1431
+ const checklistRaw = flags.checklist || null;
1432
+ const forceDone = !!(flags['force-done']);
1433
+ const forceReason = flags.reason || null;
1434
+ if (!label) die('--label is required', 2);
1435
+
1436
+ // Structural completion checklist -- replaces planning-phrase guard.
1437
+ // Agents must assert completion status explicitly via structured fields.
1438
+ if (!checklistRaw) {
1439
+ die(
1440
+ 'REJECTED: --checklist is required. Pass --checklist with JSON object asserting completion status. ' +
1441
+ "Example: --checklist '{\"work_complete\":true}' " +
1442
+ 'work_complete MUST be true -- you are asserting all assigned work is finished. ' +
1443
+ 'Do NOT call done while planning, reading files, or mid-task.',
1444
+ 1,
1445
+ );
1446
+ }
1447
+
1448
+ let checklist;
1449
+ try {
1450
+ checklist = JSON.parse(checklistRaw);
1451
+ } catch {
1452
+ die("REJECTED: --checklist must be valid JSON. Example: '{\"work_complete\":true}'", 1);
1453
+ }
1454
+
1455
+ if (!checklist.work_complete) {
1456
+ die(
1457
+ 'REJECTED: checklist.work_complete must be true. ' +
1458
+ 'You are asserting all assigned work is done. ' +
1459
+ 'Do NOT call done until all work is complete.',
1460
+ 1,
1461
+ );
1462
+ }
1463
+
1464
+ // Validate optional fields if present -- reject if any are explicitly false
1465
+ const optionalValidated = ['tests_passed', 'pushed'];
1466
+ for (const field of optionalValidated) {
1467
+ if (field in checklist && checklist[field] === false) {
1468
+ die(
1469
+ `REJECTED: checklist.${field} is false. ` +
1470
+ `Do not call done until all required checks pass. ` +
1471
+ `Fix the failing ${field.replace('_', ' ')} before calling done.`,
1472
+ 1,
1473
+ );
1474
+ }
1475
+ }
1476
+
1477
+ // Bug 1 fix: truncate summary to 300 chars (delivery path silently truncates at 500)
1478
+ const MAX_SUMMARY = 300;
1479
+ let summary = rawSummary;
1480
+ if (rawSummary.length > MAX_SUMMARY) {
1481
+ process.stderr.write(
1482
+ `[${BRAND}] warn: --summary truncated from ${rawSummary.length} chars to ${MAX_SUMMARY} chars\n`,
1483
+ );
1484
+ summary = rawSummary.slice(0, MAX_SUMMARY);
1485
+ }
1486
+
1487
+ const existing = getLabel(label);
1488
+
1489
+ // -- Fix 1: Minimum runtime guard ----------------------------------------
1490
+ // Prevent agents from calling done immediately after spawning before doing
1491
+ // any real work. Threshold scales with the task's configured timeout.
1492
+ if (existing) {
1493
+ const spawnedAtMs = existing.spawnedAt ? new Date(existing.spawnedAt).getTime() : null;
1494
+ if (spawnedAtMs !== null) {
1495
+ const elapsedMs = Date.now() - spawnedAtMs;
1496
+ // Fix 4: Use stored timeout from label entry; fall back to timeoutSeconds, then 300.
1497
+ const taskTimeout = Number(existing.timeout ?? existing.timeoutSeconds) || 300;
1498
+ const thresholdMs = taskTimeout > 600 ? 120_000 : 60_000;
1499
+
1500
+ if (elapsedMs < thresholdMs) {
1501
+ if (!forceDone) {
1502
+ const elapsedS = Math.round(elapsedMs / 1000);
1503
+ die(
1504
+ `REJECTED: Session ran for only ${elapsedS}s -- suspiciously short for this task scope. ` +
1505
+ `If work is genuinely complete, re-run with --force-done --reason "explanation".`,
1506
+ 1,
1507
+ );
1508
+ }
1509
+ // --force-done present -- require --reason
1510
+ if (!forceReason || !forceReason.trim()) {
1511
+ die(
1512
+ 'REJECTED: --force-done requires --reason explaining why short runtime is valid.',
1513
+ 1,
1514
+ );
1515
+ }
1516
+ // Log warning for audit trail
1517
+ process.stderr.write(
1518
+ `[${BRAND}] warn: force-done used for label=${label} after ${Math.round(elapsedMs / 1000)}s, reason=${forceReason}\n`,
1519
+ );
1520
+ }
1521
+ }
1522
+ }
1523
+
1524
+ // -- Fix 2: SHA required when task involves git operations ----------------
1525
+ // If the stored task prompt references git operations, --sha is mandatory.
1526
+ // Fix 1 (edge case): old labels enqueued before 6dfa458 have no taskPrompt stored.
1527
+ // When taskPrompt is absent, skip the git-SHA check to avoid breaking existing labels,
1528
+ // but log a warning so operators know the guard was bypassed.
1529
+ // Fix 2 (edge case): tightened regex uses word boundaries so prose mentions like
1530
+ // "do NOT use git push" do NOT trigger the gate; only actual commands do.
1531
+ if (existing) {
1532
+ const taskPrompt = existing.taskPrompt;
1533
+ if (!taskPrompt) {
1534
+ // taskPrompt absent -- label enqueued before guard was added; skip check but warn.
1535
+ process.stderr.write(
1536
+ `[${BRAND}] warn: taskPrompt not stored for label=${label} (enqueued before guard), skipping git-SHA check\n`,
1537
+ );
1538
+ } else {
1539
+ if (taskRequiresGitSha(taskPrompt) && !sha) {
1540
+ die(
1541
+ 'REJECTED: Task involves git commits but --sha was not provided. ' +
1542
+ 'Pass --sha with the actual HEAD SHA of your pushed branch.',
1543
+ 1,
1544
+ );
1545
+ }
1546
+ }
1547
+ }
1548
+
1549
+ // Validate --sha if provided
1550
+ if (sha) {
1551
+ // Sanitize: must be a valid git SHA (7-40 hex chars)
1552
+ if (!/^[0-9a-f]{7,40}$/i.test(sha)) {
1553
+ die(`REJECTED: --sha "${sha}" is not a valid git SHA (must be 7-40 hex characters). Pass the actual commit SHA.`, 1);
1554
+ }
1555
+ // Verify the commit exists in the local git environment
1556
+ try {
1557
+ execFileSync('git', ['cat-file', '-e', sha + '^{commit}'], { stdio: 'pipe' });
1558
+ } catch {
1559
+ die(`REJECTED: SHA ${sha} not found in local git. Push your commits before calling done.`, 1);
1560
+ }
1561
+ }
1562
+
1563
+ // -- Fix 3: Session activity check ----------------------------------------
1564
+ // A session that was spawned 2h ago but did nothing (e.g. immediately called done)
1565
+ // would pass the wall-clock guard. Check message count via the gateway sessions API
1566
+ // to catch idle sessions regardless of wall-clock age.
1567
+ // Escape hatches: --force-done (already accepted above) or --skip-activity-check.
1568
+ if (existing && existing.sessionKey && !flags['skip-activity-check'] && !forceDone) {
1569
+ try {
1570
+ const sessionInfoRes = await fetch(
1571
+ `${GATEWAY_URL}/sessions/${existing.sessionKey}`,
1572
+ {
1573
+ headers: { Authorization: `Bearer ${GATEWAY_TOKEN}` },
1574
+ signal: AbortSignal.timeout(5000),
1575
+ }
1576
+ );
1577
+ if (sessionInfoRes.ok) {
1578
+ const sessionInfo = await sessionInfoRes.json().catch(() => null);
1579
+ const msgCount = sessionInfo?.messageCount ?? sessionInfo?.messages?.length ?? null;
1580
+ if (msgCount !== null && msgCount <= 2) {
1581
+ die(
1582
+ `REJECTED: Session has only ${msgCount} messages -- likely did not complete the assigned work. ` +
1583
+ `Use --force-done --reason if work is genuinely complete, or --skip-activity-check to bypass this check.`,
1584
+ 1,
1585
+ );
1586
+ }
1587
+ }
1588
+ // Non-2xx (session not found, etc.) -> skip check gracefully
1589
+ } catch (activityErr) {
1590
+ // Gateway API unavailable or timed out -- skip check, log warning, do NOT fail.
1591
+ process.stderr.write(
1592
+ `[${BRAND}] warn: session activity check unavailable for label=${label}: ${activityErr.message} -- skipping check\n`,
1593
+ );
1594
+ }
1595
+ }
1596
+
1597
+ if (!existing) {
1598
+ // Label was never registered (e.g. direct subagent spawn, not via enqueue).
1599
+ // This is not an error -- the work completed, the label just wasn't tracked.
1600
+ process.stderr.write(`[${BRAND}] warn: no session found for label "${label}" -- registering as done\n`);
1601
+ setLabel(label, { status: 'done', summary, ...(sha ? { sha } : {}) });
1602
+
1603
+ // No watcher is polling for this label, so actively notify via the gateway
1604
+ // post office using delivery config from config.json as fallback target.
1605
+ const deliverTo = config.deliverTo ?? null;
1606
+ const deliveryChannel = config.deliveryChannel ?? null;
1607
+
1608
+ if (deliverTo) {
1609
+ await onFinished({
1610
+ label,
1611
+ job_id: null,
1612
+ run_id: null,
1613
+ agent: 'main',
1614
+ status: 'ok',
1615
+ duration_ms: 0,
1616
+ session_key: null,
1617
+ summary,
1618
+ deliverTo,
1619
+ deliveryChannel,
1620
+ }).catch(() => {});
1621
+ } else {
1622
+ process.stderr.write(`[${BRAND}] warn: no deliverTo in config -- completion not delivered for "${label}"\n`);
1623
+ }
1624
+
1625
+ out({ ok: true, label, status: 'done', summary, message: 'Label not previously registered; marked done.' });
1626
+ return;
1627
+ }
1628
+
1629
+ setLabel(label, {
1630
+ status: 'done',
1631
+ summary,
1632
+ ...(sha ? { sha } : {}),
1633
+ });
1634
+
1635
+ // Disarm watchdog when agent signals done
1636
+ disarmWatchdog(label);
1637
+
1638
+ // Fire dispatch.finished hook (best-effort)
1639
+ const spawnedAtMs = existing.spawnedAt ? new Date(existing.spawnedAt).getTime() : Date.now();
1640
+ await onFinished({
1641
+ label,
1642
+ job_id: existing.runId || null,
1643
+ run_id: existing.runId || null,
1644
+ agent: existing.agent || 'main',
1645
+ status: 'ok',
1646
+ duration_ms: Date.now() - spawnedAtMs,
1647
+ session_key: existing.sessionKey || null,
1648
+ }).catch(() => {});
1649
+
1650
+ out({ ok: true, label, status: 'done', summary, message: 'Label marked done via agent signal.' });
1651
+ }
1652
+
1653
+ /**
1654
+ * send / steer -- send a message into a running session.
1655
+ *
1656
+ * Flags:
1657
+ * --label <string> Required (unless --session-key)
1658
+ * --message <string> Required. Message to send
1659
+ * --session-key <key> Optional. Direct session key (bypasses label lookup)
1660
+ */
1661
+ async function cmdSend(flags) {
1662
+ const label = flags.label;
1663
+ const message = flags.message;
1664
+ const directKey = flags['session-key'];
1665
+
1666
+ if (!message) die('--message is required', 2);
1667
+ if (!label && !directKey) die('--label or --session-key is required', 2);
1668
+
1669
+ let sessionKey = directKey;
1670
+ if (!sessionKey) {
1671
+ const entry = getLabel(label);
1672
+ if (!entry?.sessionKey) die(`No session found for label "${label}"`);
1673
+ sessionKey = entry.sessionKey;
1674
+ }
1675
+
1676
+ const idem = randomUUID();
1677
+
1678
+ try {
1679
+ const response = gatewayCall('agent', {
1680
+ message,
1681
+ sessionKey,
1682
+ idempotencyKey: idem,
1683
+ deliver: false,
1684
+ lane: 'nested',
1685
+ }, { timeout: 15000 });
1686
+
1687
+ out({
1688
+ ok: true,
1689
+ label: label || null,
1690
+ sessionKey,
1691
+ runId: response?.runId || idem,
1692
+ status: 'sent',
1693
+ message: 'Message sent to session.',
1694
+ });
1695
+ } catch (err) {
1696
+ die(`Failed to send message: ${err.message}`);
1697
+ }
1698
+ }
1699
+
1700
+ /**
1701
+ * heartbeat -- check session liveness.
1702
+ *
1703
+ * Flags:
1704
+ * --label <string> Check session for this label
1705
+ * --session-key <key> Or check directly by key
1706
+ */
1707
+ function cmdHeartbeat(flags) {
1708
+ const label = flags.label;
1709
+ const directKey = flags['session-key'];
1710
+
1711
+ if (!label && !directKey) die('--label or --session-key is required', 2);
1712
+
1713
+ let sessionKey = directKey;
1714
+ if (!sessionKey) {
1715
+ const entry = getLabel(label);
1716
+ if (!entry?.sessionKey) die(`No session found for label "${label}"`);
1717
+ sessionKey = entry.sessionKey;
1718
+ }
1719
+
1720
+ const hbAgent = label ? (getLabel(label)?.agent || agentFromSessionKey(sessionKey)) : agentFromSessionKey(sessionKey);
1721
+ const hbStore = readSessionsStore(hbAgent || 'main');
1722
+
1723
+ if (!hbStore) {
1724
+ out({ ok: false, sessionKey, alive: false, message: 'Sessions store unavailable' });
1725
+ return;
1726
+ }
1727
+
1728
+ const sessionEntry = hbStore[sessionKey];
1729
+ if (!sessionEntry) {
1730
+ out({ ok: false, sessionKey, alive: false, message: 'Session not found in sessions store' });
1731
+ return;
1732
+ }
1733
+
1734
+ const ageMs = sessionEntry.updatedAt
1735
+ ? Date.now() - (typeof sessionEntry.updatedAt === 'number' ? sessionEntry.updatedAt : new Date(sessionEntry.updatedAt).getTime())
1736
+ : null;
1737
+
1738
+ out({
1739
+ ok: true,
1740
+ sessionKey,
1741
+ label: label || null,
1742
+ alive: ageMs !== null && ageMs < 10 * 60 * 1000,
1743
+ ageMs,
1744
+ updatedAt: sessionEntry.updatedAt ? new Date(sessionEntry.updatedAt).toISOString() : null,
1745
+ sessionId: sessionEntry.sessionId,
1746
+ model: sessionEntry.model || null,
1747
+ });
1748
+ }
1749
+
1750
+ /**
1751
+ * list -- list all tracked labels and their sessions.
1752
+ *
1753
+ * Flags:
1754
+ * --status <status> Filter by status (running|done|error)
1755
+ * --limit <n> Max entries (default: 20)
1756
+ */
1757
+ function cmdList(flags) {
1758
+ const filterStatus = flags.status || null;
1759
+ const limit = parseInt(flags.limit || '20', 10);
1760
+
1761
+ const labels = loadLabels();
1762
+ let entries = Object.entries(labels).map(([name, data]) => ({
1763
+ label: name,
1764
+ ...data,
1765
+ }));
1766
+
1767
+ if (filterStatus) {
1768
+ entries = entries.filter(e => e.status === filterStatus);
1769
+ }
1770
+
1771
+ entries.sort((a, b) => {
1772
+ const ta = a.updatedAt ? new Date(a.updatedAt).getTime() : 0;
1773
+ const tb = b.updatedAt ? new Date(b.updatedAt).getTime() : 0;
1774
+ return tb - ta;
1775
+ });
1776
+
1777
+ entries = entries.slice(0, limit);
1778
+
1779
+ out({ ok: true, count: entries.length, labels: entries });
1780
+ }
1781
+
1782
+ // -- Usage ----------------------------------------------------
1783
+
1784
+ function usage() {
1785
+ process.stdout.write(`
1786
+ ${BRAND} -- sub-agent dispatch CLI (native gateway API)
1787
+
1788
+ Usage: openclaw-scheduler <subcommand> [flags]
1789
+
1790
+ Subcommands:
1791
+ enqueue --label <l> --message <m>|--message-file <f> [--agent <a>] [--thinking <t>]
1792
+ [--timeout <s>] [--mode fresh|reuse] [--model <m>]
1793
+ [--origin <o>] (auto-detected from active session; override with e.g. "telegram:<your-group-id>")
1794
+ [--deliver-to <id>] [--deliver-channel <ch>] [--delivery-mode <m>]
1795
+ (--deliver-to defaults to origin chat ID when --origin is "telegram:<id>")
1796
+ [--no-monitor] [--monitor-interval <cron>] [--monitor-timeout <min>]
1797
+ [--verify-cmd <shell_cmd>]
1798
+
1799
+ status --label <l>
1800
+
1801
+ stuck [--threshold-min <n>] (exits 1 if stuck sessions found)
1802
+
1803
+ result --label <l>
1804
+
1805
+ send --label <l> --message <m> [--session-key <k>]
1806
+
1807
+ steer --label <l> --message <m> (alias for send)
1808
+
1809
+ heartbeat --label <l> OR --session-key <k>
1810
+
1811
+ list [--status running|done|error] [--limit <n>]
1812
+
1813
+ sync [--dry-run] (reconcile labels.json with sessions store)
1814
+
1815
+ done --label <l> [--summary <s>] (agent-side completion signal; marks label as done)
1816
+ `);
1817
+ }
1818
+
1819
+ // -- Main -----------------------------------------------------
1820
+
1821
+ const [,, subcommand, ...rest] = process.argv;
1822
+ const flags = parseFlags(rest);
1823
+
1824
+ switch (subcommand) {
1825
+ case 'enqueue': await cmdEnqueue(flags); break;
1826
+ case 'status': cmdStatus(flags); break;
1827
+ case 'stuck': await cmdStuck(flags); break;
1828
+ case 'result': cmdResult(flags); break;
1829
+ case 'send': await cmdSend(flags); break;
1830
+ case 'steer': await cmdSend(flags); break;
1831
+ case 'heartbeat': cmdHeartbeat(flags); break;
1832
+ case 'list': cmdList(flags); break;
1833
+ case 'sync': cmdSync(flags); break;
1834
+ case 'done': await cmdDone(flags); break;
1835
+ default: usage(); process.exit(2);
1836
+ }