openclaw-scheduler 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/AGENTS.md +302 -0
  2. package/BEST-PRACTICES.md +506 -0
  3. package/CHANGELOG.md +82 -0
  4. package/CODE_OF_CONDUCT.md +22 -0
  5. package/CONTEXT.md +26 -0
  6. package/CONTRIBUTING.md +73 -0
  7. package/IMPLEMENTATION_SPEC.md +170 -0
  8. package/INSTALL-ADDITIONAL-HOST.md +333 -0
  9. package/INSTALL-LINUX.md +419 -0
  10. package/INSTALL-WINDOWS.md +305 -0
  11. package/INSTALL.md +364 -0
  12. package/JOB-QUICK-REF.md +222 -0
  13. package/LICENSE +21 -0
  14. package/QUICK-START.md +256 -0
  15. package/README.md +2170 -0
  16. package/SECURITY.md +34 -0
  17. package/UNINSTALL.md +129 -0
  18. package/UPGRADING.md +436 -0
  19. package/agents.js +67 -0
  20. package/approval.js +107 -0
  21. package/backup.js +390 -0
  22. package/bin/openclaw-scheduler.js +138 -0
  23. package/cli.js +1083 -0
  24. package/db.js +122 -0
  25. package/dispatch/529-recovery.mjs +204 -0
  26. package/dispatch/README.md +372 -0
  27. package/dispatch/config.example.json +24 -0
  28. package/dispatch/deliver-watcher.sh +57 -0
  29. package/dispatch/hooks.mjs +171 -0
  30. package/dispatch/index.mjs +1836 -0
  31. package/dispatch/watcher.mjs +1396 -0
  32. package/dispatch-queue.js +112 -0
  33. package/dispatcher-approvals.js +96 -0
  34. package/dispatcher-delivery.js +43 -0
  35. package/dispatcher-maintenance.js +242 -0
  36. package/dispatcher-shell.js +29 -0
  37. package/dispatcher-strategies.js +1280 -0
  38. package/dispatcher-utils.js +81 -0
  39. package/dispatcher.js +855 -0
  40. package/docs/adr-schedule-ownership.md +73 -0
  41. package/docs/gateway-contract.md +904 -0
  42. package/docs/plans/2026-03-09-fix-typescript-types.md +91 -0
  43. package/docs/plans/2026-03-09-test-coverage-gaps.md +83 -0
  44. package/docs/plans/2026-03-10-dispatcher-refactor.md +801 -0
  45. package/docs/trust-architecture.md +266 -0
  46. package/gateway.js +473 -0
  47. package/idempotency.js +119 -0
  48. package/index.d.ts +864 -0
  49. package/index.js +17 -0
  50. package/jobs.js +1224 -0
  51. package/messages.js +357 -0
  52. package/migrate-consolidate.js +694 -0
  53. package/migrate.js +125 -0
  54. package/package.json +130 -0
  55. package/paths.js +79 -0
  56. package/prompt-context.js +94 -0
  57. package/retrieval.js +176 -0
  58. package/runs.js +270 -0
  59. package/scheduler-schema.js +101 -0
  60. package/schema.sql +480 -0
  61. package/scripts/dispatch-cli-utils.mjs +65 -0
  62. package/scripts/inbox-consumer.mjs +288 -0
  63. package/scripts/stuck-detector.sh +18 -0
  64. package/scripts/stuck-run-detector.mjs +333 -0
  65. package/scripts/telegram-webhook-check.mjs +238 -0
  66. package/setup.mjs +724 -0
  67. package/shell-result.js +214 -0
  68. package/task-tracker.js +300 -0
  69. package/team-adapter.js +335 -0
  70. package/v02-runtime.js +599 -0
package/db.js ADDED
@@ -0,0 +1,122 @@
1
+ // Database layer -- SQLite via better-sqlite3
2
+ import Database from 'better-sqlite3';
3
+ import { readFileSync } from 'fs';
4
+ import { join, dirname } from 'path';
5
+ import { fileURLToPath } from 'url';
6
+ import { ensureSchedulerDbParent, resolveSchedulerDbPath } from './paths.js';
7
+
8
+ const __dirname = dirname(fileURLToPath(import.meta.url));
9
+
10
+ let _db;
11
+ let _dbPath;
12
+
13
+ /**
14
+ * Override the DB path at runtime (must be called before getDb/initDb).
15
+ * Pass ':memory:' for in-memory test databases.
16
+ */
17
+ export function setDbPath(path) {
18
+ if (_db) { _db.close(); _db = null; }
19
+ _dbPath = path;
20
+ }
21
+
22
+ export function getDb() {
23
+ if (!_db) {
24
+ const dbPath = _dbPath || resolveSchedulerDbPath({ env: process.env });
25
+ if (dbPath !== ':memory:') ensureSchedulerDbParent(dbPath);
26
+ _db = new Database(dbPath);
27
+ if (dbPath !== ':memory:') _db.pragma('journal_mode = WAL');
28
+ _db.pragma('busy_timeout = 5000');
29
+ _db.pragma('foreign_keys = ON');
30
+ }
31
+ return _db;
32
+ }
33
+
34
+ export function getResolvedDbPath() {
35
+ return _dbPath || resolveSchedulerDbPath({ env: process.env });
36
+ }
37
+
38
+ export async function initDb() {
39
+ const db = getDb();
40
+ const schema = readFileSync(join(__dirname, 'schema.sql'), 'utf8');
41
+ const hasUserTables = (db.prepare(`
42
+ SELECT COUNT(*) AS cnt
43
+ FROM sqlite_master
44
+ WHERE type = 'table'
45
+ AND name NOT LIKE 'sqlite_%'
46
+ `).get()?.cnt ?? 0) > 0;
47
+ const applySchema = (label) => {
48
+ try {
49
+ db.exec(schema);
50
+ return true;
51
+ } catch (err) {
52
+ process.stderr.write(`${new Date().toISOString()} [db] ${label}: ${err.message}\n`);
53
+ return false;
54
+ }
55
+ };
56
+ const runConsolidate = async () => {
57
+ try {
58
+ const { default: consolidate } = await import('./migrate-consolidate.js');
59
+ const applied = consolidate();
60
+ if (applied) {
61
+ process.stderr.write(`${new Date().toISOString()} [db] Consolidation migration applied\n`);
62
+ }
63
+ } catch (err) {
64
+ process.stderr.write(`${new Date().toISOString()} [db] migrate-consolidate error: ${err.message}\n`);
65
+ }
66
+ };
67
+
68
+ if (hasUserTables) {
69
+ // Existing installs: normalize via migration first so schema re-apply doesn't
70
+ // trip over legacy partial tables/indexes.
71
+ await runConsolidate();
72
+ applySchema('Schema apply warning');
73
+ return db;
74
+ }
75
+
76
+ // Net-new installs: create the baseline schema, then run consolidation in case
77
+ // a package upgrade adds idempotent backfills the base schema doesn't need.
78
+ applySchema('Initial schema apply warning');
79
+ await runConsolidate();
80
+
81
+ // Re-apply schema so indexes/table defs are fully aligned after consolidation.
82
+ applySchema('Schema re-apply warning');
83
+
84
+ return db;
85
+ }
86
+
87
+ /**
88
+ * Checkpoint WAL to main DB file. Call periodically to minimize
89
+ * data loss window on crash/SIGKILL. Returns checkpoint stats.
90
+ */
91
+ export function checkpointWal() {
92
+ if (!_db) return null;
93
+ try {
94
+ const result = _db.pragma('wal_checkpoint(PASSIVE)');
95
+ return result?.[0] || null;
96
+ } catch (err) {
97
+ const ts = new Date().toISOString();
98
+ process.stderr.write(`${ts} [db] WAL checkpoint error: ${err.message}\n`);
99
+ return null;
100
+ }
101
+ }
102
+
103
+ export function closeDb() {
104
+ if (_db) {
105
+ try {
106
+ // Checkpoint WAL to main DB before closing to prevent data loss
107
+ const result = _db.pragma('wal_checkpoint(TRUNCATE)');
108
+ const ts = new Date().toISOString();
109
+ if (result && result[0]) {
110
+ const r = result[0];
111
+ process.stderr.write(`${ts} [db] WAL checkpoint on close: busy=${r.busy}, checkpointed=${r.checkpointed}, log=${r.log}\n`);
112
+ } else {
113
+ process.stderr.write(`${ts} [db] WAL checkpoint on close: ok\n`);
114
+ }
115
+ } catch (err) {
116
+ const ts = new Date().toISOString();
117
+ process.stderr.write(`${ts} [db] WAL checkpoint failed on close: ${err.message}\n`);
118
+ }
119
+ _db.close();
120
+ _db = null;
121
+ }
122
+ }
@@ -0,0 +1,204 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * dispatch 529 recovery -- scheduler safety net for 529/overload errors.
4
+ *
5
+ * Scans labels.json for sessions in 'error' state with 529/overload patterns.
6
+ * If retryCount < MAX_RETRIES and the watcher hasn't already handled it,
7
+ * re-enqueues the session.
8
+ *
9
+ * Idempotency:
10
+ * - Checks retryCount + lastRetryAt to avoid double-retrying if the watcher
11
+ * already handled it (watcher updates retryCount and status immediately).
12
+ * - If status is already 'running', skip (watcher handled it).
13
+ * - If retryCount >= MAX, skip (already exhausted).
14
+ *
15
+ * Run by scheduler every 10 minutes as a safety net.
16
+ *
17
+ * Exit codes:
18
+ * 0 -- all good (nothing to retry, or retries dispatched)
19
+ * 1 -- error
20
+ */
21
+
22
+ import { readFileSync, writeFileSync, renameSync } from 'fs';
23
+ import { execFileSync } from 'child_process';
24
+ import { dirname, join } from 'path';
25
+ import { fileURLToPath } from 'url';
26
+
27
+ const __dirname = dirname(fileURLToPath(import.meta.url));
28
+ const LABELS_PATH = process.env.DISPATCH_LABELS_PATH || join(__dirname, 'labels.json');
29
+ const INDEX_PATH = process.env.DISPATCH_INDEX_PATH || join(__dirname, 'index.mjs');
30
+
31
+ const MAX_RETRIES = 3;
32
+ // Only recover errors that happened within the last 60 minutes
33
+ // (don't revive ancient failures)
34
+ const MAX_ERROR_AGE_MS = 60 * 60 * 1000;
35
+ // Minimum time since last retry before the safety net triggers
36
+ // (give the watcher time to handle it first -- 5 minutes)
37
+ const MIN_SINCE_LAST_UPDATE_MS = 5 * 60 * 1000;
38
+
39
+ const OVERLOAD_PATTERNS = [
40
+ /529/i,
41
+ /failover\s*error/i,
42
+ /overload/i,
43
+ /temporarily\s+overloaded/i,
44
+ /service.*overloaded/i,
45
+ ];
46
+
47
+ function is529Error(errorMsg) {
48
+ if (!errorMsg || typeof errorMsg !== 'string') return false;
49
+ return OVERLOAD_PATTERNS.some(p => p.test(errorMsg));
50
+ }
51
+
52
+ function loadLabels() {
53
+ try {
54
+ return JSON.parse(readFileSync(LABELS_PATH, 'utf-8'));
55
+ } catch {
56
+ return {};
57
+ }
58
+ }
59
+
60
+ function saveLabels(labels) {
61
+ const tmp = LABELS_PATH + '.tmp.' + process.pid;
62
+ writeFileSync(tmp, JSON.stringify(labels, null, 2) + '\n');
63
+ renameSync(tmp, LABELS_PATH);
64
+ }
65
+
66
+ function notify(message) {
67
+ try {
68
+ const cliPath = join(__dirname, '..', 'cli.js');
69
+ execFileSync(process.execPath, [cliPath, 'msg', 'send', 'scheduler', 'main', message], {
70
+ encoding: 'utf-8',
71
+ timeout: 10000,
72
+ stdio: ['pipe', 'pipe', 'pipe'],
73
+ });
74
+ } catch {}
75
+ }
76
+
77
+ function respawnSession(label, entry) {
78
+ const continuationMsg = `[Auto-retry after 529 overload -- scheduler safety net] This is an automatic retry. Please continue your previous task from where you left off.`;
79
+
80
+ // Try send (reuse session) first
81
+ try {
82
+ execFileSync(process.execPath, [
83
+ INDEX_PATH, 'send',
84
+ '--label', label,
85
+ '--message', continuationMsg,
86
+ ], {
87
+ encoding: 'utf-8',
88
+ timeout: 30000,
89
+ stdio: ['pipe', 'pipe', 'pipe'],
90
+ });
91
+ return 'send';
92
+ } catch {}
93
+
94
+ // Fallback: fresh enqueue
95
+ try {
96
+ const args = [
97
+ INDEX_PATH, 'enqueue',
98
+ '--label', label,
99
+ '--message', continuationMsg,
100
+ '--mode', 'fresh',
101
+ ];
102
+ if (entry?.model) args.push('--model', entry.model);
103
+ if (entry?.thinking) args.push('--thinking', entry.thinking);
104
+ if (entry?.origin) args.push('--origin', entry.origin);
105
+ if (entry?.deliverTo) {
106
+ args.push('--deliver-to', entry.deliverTo);
107
+ if (entry?.deliveryMode) args.push('--delivery-mode', entry.deliveryMode);
108
+ if (entry?.deliverChannel) args.push('--deliver-channel', entry.deliverChannel);
109
+ }
110
+
111
+ execFileSync(process.execPath, args, {
112
+ encoding: 'utf-8',
113
+ timeout: 30000,
114
+ stdio: ['pipe', 'pipe', 'pipe'],
115
+ });
116
+ return 'fresh';
117
+ } catch {
118
+ return null;
119
+ }
120
+ }
121
+
122
+ // -- Main ----------------------------------------------------
123
+
124
+ const labels = loadLabels();
125
+ const now = Date.now();
126
+ const results = [];
127
+ for (const [name, entry] of Object.entries(labels)) {
128
+ // Only look at error-state sessions
129
+ if (entry.status !== 'error') continue;
130
+
131
+ const errorMsg = entry.error || '';
132
+ if (!is529Error(errorMsg)) continue;
133
+
134
+ // Check age -- don't retry very old errors
135
+ const updatedAt = entry.updatedAt ? new Date(entry.updatedAt).getTime() : 0;
136
+ const errorAge = now - updatedAt;
137
+ if (errorAge > MAX_ERROR_AGE_MS) {
138
+ results.push({ label: name, action: 'skip', reason: `error too old (${Math.round(errorAge / 60000)}min)` });
139
+ continue;
140
+ }
141
+
142
+ // Check if watcher already handled it (updated recently)
143
+ if (errorAge < MIN_SINCE_LAST_UPDATE_MS) {
144
+ results.push({ label: name, action: 'skip', reason: `updated ${Math.round(errorAge / 1000)}s ago (watcher may be handling)` });
145
+ continue;
146
+ }
147
+
148
+ // Check retry count
149
+ const retryCount = entry.retryCount || 0;
150
+ if (retryCount >= MAX_RETRIES) {
151
+ results.push({ label: name, action: 'skip', reason: `max retries exhausted (${retryCount}/${MAX_RETRIES})` });
152
+ continue;
153
+ }
154
+
155
+ // Attempt retry
156
+ const newRetryCount = retryCount + 1;
157
+ process.stderr.write(`[529-recovery] retrying [${name}] (attempt ${newRetryCount}/${MAX_RETRIES})\n`);
158
+
159
+ // Update labels.json first (claim the retry)
160
+ const freshLabels = loadLabels();
161
+ if (freshLabels[name]) {
162
+ freshLabels[name].retryCount = newRetryCount;
163
+ freshLabels[name].updatedAt = new Date().toISOString();
164
+ saveLabels(freshLabels);
165
+ }
166
+
167
+ const method = respawnSession(name, entry);
168
+ if (method) {
169
+ // Mark as running
170
+ const updated = loadLabels();
171
+ if (updated[name]) {
172
+ updated[name].status = 'running';
173
+ updated[name].error = null;
174
+ updated[name].updatedAt = new Date().toISOString();
175
+ saveLabels(updated);
176
+ }
177
+ notify(`🌶️ Dispatch 529 recovery: [${name}] retried (${newRetryCount}/${MAX_RETRIES}) via ${method}`);
178
+ results.push({ label: name, action: 'retried', method, retryCount: newRetryCount });
179
+ } else {
180
+ notify(`🌶️ Dispatch 529 recovery: [${name}] retry FAILED (${newRetryCount}/${MAX_RETRIES})`);
181
+ results.push({ label: name, action: 'retry_failed', retryCount: newRetryCount });
182
+ }
183
+ }
184
+
185
+ // Output summary -- scheduler delivers stdout if non-empty and delivery_mode=announce
186
+ if (results.length > 0) {
187
+ const retried = results.filter(r => r.action === 'retried');
188
+ const skipped = results.filter(r => r.action === 'skip');
189
+ const failed = results.filter(r => r.action === 'retry_failed');
190
+
191
+ const lines = [];
192
+ if (retried.length) lines.push(`✅ Retried: ${retried.map(r => r.label).join(', ')}`);
193
+ if (failed.length) lines.push(`❌ Failed: ${failed.map(r => r.label).join(', ')}`);
194
+ if (skipped.length) lines.push(`⏭️ Skipped: ${skipped.map(r => `${r.label} (${r.reason})`).join(', ')}`);
195
+
196
+ // Only produce stdout (which triggers delivery) if we actually retried or failed something
197
+ if (retried.length || failed.length) {
198
+ process.stdout.write(`🌶️ 529 Recovery:\n${lines.join('\n')}\n`);
199
+ } else {
200
+ process.stderr.write(`[529-recovery] scan complete: ${skipped.length} skipped\n`);
201
+ }
202
+ } else {
203
+ process.stderr.write('[529-recovery] scan complete: no 529 errors found\n');
204
+ }
@@ -0,0 +1,372 @@
1
+ # dispatch
2
+
3
+ **Sub-agent dispatch CLI for OpenClaw — native gateway API edition.**
4
+
5
+ dispatch spawns and steers isolated agent sessions directly via the OpenClaw
6
+ Gateway API. It tracks label→session mappings in a local JSON ledger, giving
7
+ you a simple CLI to dispatch work, check on it, steer it mid-run, and get
8
+ results back.
9
+
10
+ No scheduler DB dependency. No dispatcher tick delay. Sessions start instantly.
11
+
12
+ ---
13
+
14
+ ## Files
15
+
16
+ | File | Purpose |
17
+ |---|---|
18
+ | `index.mjs` | CLI entry point — 10 subcommands |
19
+ | `hooks.mjs` | Lifecycle event emitter (Loki + optional HTTP webhook) |
20
+ | `watcher.mjs` | Delivery monitoring process |
21
+ | `529-recovery.mjs` | Transient error recovery |
22
+ | `deliver-watcher.sh` | Shell wrapper for result retrieval |
23
+ | `chilisaus.mjs` | Branded wrapper |
24
+ | `config.example.json` | Example config |
25
+ | `test-done-postoffice.mjs` | Done handler test |
26
+ | `labels.json` | Local label→session ledger (gitignored) |
27
+ | `README.md` | This file |
28
+
29
+ ---
30
+
31
+ ## How it works
32
+
33
+ dispatch calls the OpenClaw Gateway RPC API directly:
34
+
35
+ 1. **`sessions.patch`** — configure the session (model, thinking level, spawn depth)
36
+ 2. **`agent`** — send a message into the session (spawning it if new)
37
+ 3. **`sessions.list`** — query session status and liveness
38
+ 4. **`chat.history`** — read session transcripts for results
39
+
40
+ ```
41
+ Orchestrator calls:
42
+ dispatch enqueue --label ticket-42 --message "Fix the deploy script"
43
+
44
+ → Creates session key: agent:main:subagent:<uuid>
45
+ → Patches session with model/thinking/spawnDepth
46
+ → Calls gateway `agent` method with the task
47
+ → Session starts immediately (no scheduler tick delay)
48
+ → Tracks label→sessionKey in labels.json
49
+ → Agent auto-announces results on completion
50
+ → hooks.mjs fires dispatch.started to Loki
51
+ ```
52
+
53
+ ---
54
+
55
+ ## Subcommands
56
+
57
+ ### `enqueue` — spawn a new session
58
+
59
+ ```bash
60
+ node dispatch/index.mjs enqueue \
61
+ --label "ticket-42" \
62
+ --message "Fix the deploy script" \
63
+ --mode fresh \ # fresh | reuse
64
+ --agent main \
65
+ --model anthropic/claude-sonnet-4-6 \
66
+ --thinking high \
67
+ --timeout 300 \
68
+ --deliver-to YOUR_CHAT_ID \
69
+ --deliver-channel telegram \
70
+ --delivery-mode announce
71
+ ```
72
+
73
+ | Flag | Default | Description |
74
+ |---|---|---|
75
+ | `--label` | required | Human name — used for lookup/reuse |
76
+ | `--message` | required | Prompt sent to the agent |
77
+ | `--mode` | `fresh` | `fresh` = new session; `reuse` = continue last session for this label |
78
+ | `--session-key` | — | Explicit session key (bypasses ledger lookup) |
79
+ | `--agent` | `main` | Agent ID |
80
+ | `--model` | — | Model override (e.g. `anthropic/claude-sonnet-4-6`) |
81
+ | `--thinking` | — | Reasoning level: `low`, `high`, `xhigh` |
82
+ | `--timeout` | `300` | Seconds before run times out |
83
+ | `--deliver-to` | — | Delivery target (chat ID, channel ID, handle, etc.). Enables `deliver:true` on the gateway call |
84
+ | `--deliver-channel` | `telegram` | Delivery channel for `--deliver-to` (telegram, slack, etc.) |
85
+ | `--delivery-mode` | `announce` | `announce`, `announce-always`, `none` |
86
+ | `--origin` | -- | Dispatch origin (e.g. `telegram:12345`) |
87
+ | `--no-monitor` | false | Skip watcher monitoring |
88
+ | `--monitor-interval` | -- | Watcher cron expression |
89
+ | `--monitor-timeout` | -- | Watcher timeout in minutes |
90
+ | `--verify-cmd` | -- | Post-completion verification command |
91
+
92
+ ### `status` — session status for a label
93
+
94
+ ```bash
95
+ node dispatch/index.mjs status --label "ticket-42"
96
+ ```
97
+
98
+ Returns ledger info + live session data from gateway (model, age, token usage).
99
+
100
+ ### `stuck` — find stuck running sessions
101
+
102
+ ```bash
103
+ node dispatch/index.mjs stuck --threshold-min 15
104
+ ```
105
+
106
+ Exit 0 = nothing stuck (silent).
107
+ Exit 1 = stuck sessions found (triggers announce delivery).
108
+
109
+ Checks labels.json for sessions marked `running`, cross-references gateway
110
+ session store for last activity timestamp.
111
+
112
+ ### `result` — last assistant reply from a session
113
+
114
+ ```bash
115
+ node dispatch/index.mjs result --label "ticket-42"
116
+ ```
117
+
118
+ Reads the session transcript via `chat.history` and returns the last assistant
119
+ message.
120
+
121
+ ### `done` — mark a tracked session complete
122
+
123
+ ```bash
124
+ node dispatch/index.mjs done \
125
+ --label "ticket-42" \
126
+ --summary "Work complete" \
127
+ --checklist '{"work_complete":true}'
128
+ ```
129
+
130
+ Marks the label as `done` immediately so the watcher can resolve the run without
131
+ waiting for timeout polling.
132
+
133
+ | Flag | Default | Description |
134
+ |---|---|---|
135
+ | `--label` | required | Label to mark complete |
136
+ | `--summary` | `completed (agent signal)` | One-line completion summary |
137
+ | `--checklist` | required | JSON object. Must include `work_complete:true`; optional fields like `tests_passed` and `pushed` may not be `false` |
138
+ | `--sha` | — | Required when the stored task prompt includes real git commands like `git push`, `git rebase`, `git cherry-pick`, `--force-with-lease`, or `--force-push` |
139
+ | `--force-done` | false | Override the minimum-runtime guard for legitimate short tasks |
140
+ | `--reason` | — | Required with `--force-done`; records why an unusually short session is still valid |
141
+ | `--skip-activity-check` | false | Bypass the gateway message-count heuristic when that check is too strict for the task |
142
+
143
+ Notes:
144
+ - The minimum runtime guard rejects very short sessions unless `--force-done --reason ...` is provided.
145
+ - Older labels created before `taskPrompt` storage will warn and skip the git-SHA gate.
146
+ - Gateway activity checks fail open: if the session API is unavailable, `done` logs a warning and continues.
147
+
148
+ ### `send` — message a running session
149
+
150
+ ```bash
151
+ node dispatch/index.mjs send \
152
+ --label "ticket-42" \
153
+ --message "Tests still failing on line 42, focus on the edge case"
154
+ ```
155
+
156
+ Sends a message directly into the running session. The agent sees it as a new
157
+ user turn and continues working. This is the **mid-session steering superpower**.
158
+
159
+ ### `steer` — alias for send
160
+
161
+ ```bash
162
+ node dispatch/index.mjs steer \
163
+ --label "ticket-42" \
164
+ --message "Change approach: use the new API instead"
165
+ ```
166
+
167
+ Identical to `send`. The name makes intent explicit.
168
+
169
+ ### `heartbeat` — check session liveness
170
+
171
+ ```bash
172
+ node dispatch/index.mjs heartbeat --label "ticket-42"
173
+ # or:
174
+ node dispatch/index.mjs heartbeat --session-key "agent:main:subagent:..."
175
+ ```
176
+
177
+ Returns whether the session is alive (updated within the last 10 minutes),
178
+ plus session metadata.
179
+
180
+ ### `list` — list all tracked labels
181
+
182
+ ```bash
183
+ node dispatch/index.mjs list [--status running] [--limit 10]
184
+ ```
185
+
186
+ Shows all labels in the ledger, sorted by most recent. Filter by status.
187
+
188
+ ### `sync` -- reconcile labels with sessions store
189
+
190
+ ```bash
191
+ node dispatch/index.mjs sync
192
+ ```
193
+
194
+ Reconciles `labels.json` with the gateway sessions store. Sessions that no
195
+ longer exist on the gateway are marked stale, and sessions present on the
196
+ gateway but missing from the ledger are imported. Useful after gateway restarts
197
+ or manual session cleanup.
198
+
199
+ ---
200
+
201
+ ## Session Reuse
202
+
203
+ `--mode reuse` looks up the last session key for this label in `labels.json`
204
+ and sends the new message into that existing session. The agent picks up where
205
+ it left off with full conversation history.
206
+
207
+ ```bash
208
+ # First run — fresh session
209
+ node dispatch/index.mjs enqueue --label "daily-report" --message "Generate today's report"
210
+
211
+ # Later — continue in the same session
212
+ node dispatch/index.mjs enqueue --label "daily-report" --message "Add the Q4 numbers" --mode reuse
213
+ ```
214
+
215
+ ---
216
+
217
+ ## Labels Ledger (`labels.json`)
218
+
219
+ Local JSON file mapping labels to session keys:
220
+
221
+ ```json
222
+ {
223
+ "ticket-42": {
224
+ "sessionKey": "agent:main:subagent:9131309b-...",
225
+ "runId": "46030a3d-...",
226
+ "agent": "main",
227
+ "mode": "fresh",
228
+ "model": null,
229
+ "thinking": null,
230
+ "spawnedAt": "2026-03-01T04:27:52.181Z",
231
+ "status": "running",
232
+ "summary": null,
233
+ "error": null,
234
+ "updatedAt": "2026-03-01T04:27:52.182Z"
235
+ }
236
+ }
237
+ ```
238
+
239
+ Gitignored by default. Session-local, not shared.
240
+
241
+ ---
242
+
243
+ ## Lifecycle Hooks (`hooks.mjs`)
244
+
245
+ Fires structured events to Loki and/or an HTTP webhook:
246
+
247
+ | Event | When |
248
+ |---|---|
249
+ | `dispatch.started` | Session spawned |
250
+ | `dispatch.finished` | Session completed |
251
+ | `dispatch.stuck` | `stuck` subcommand found stuck sessions |
252
+
253
+ **Configuration:**
254
+
255
+ ```bash
256
+ export LOKI_PUSH_URL=http://your-loki-host/loki/api/v1/push
257
+ export DISPATCH_WEBHOOK_URL=https://your-endpoint.example.com/hook
258
+ export DISPATCH_HOST=my-agent-host
259
+ ```
260
+
261
+ ---
262
+
263
+ ## Gateway Auth
264
+
265
+ dispatch reads the gateway token from:
266
+ 1. `OPENCLAW_GATEWAY_TOKEN` environment variable
267
+ 2. `~/.openclaw/openclaw.json` → `gateway.auth.token`
268
+
269
+ No manual token configuration needed on a standard OpenClaw install.
270
+
271
+ ---
272
+
273
+ ## Delivery
274
+
275
+ ### How it works
276
+
277
+ When `--deliver-to` is set, dispatch registers a **scheduler watcher job**
278
+ after dispatching the session. The watcher polls the session result every
279
+ minute until the agent produces a reply, then delivers via the scheduler's
280
+ `handleDelivery` pipeline.
281
+
282
+ ```
283
+ dispatch enqueue --deliver-to <telegram-user-id>
284
+ -> gateway agent call (deliver: false, fire-and-forget)
285
+ -> scheduler job: <brand>-deliver:<label> (run_now: true, shell, one-shot)
286
+ -> watcher.mjs: long-running blocking process polls session status
287
+ -> on success (exit 0): scheduler delivers output to telegram/<telegram-user-id>
288
+ -> job auto-prunes via ttl_hours (default 48h)
289
+ ```
290
+
291
+ **Why scheduler instead of gateway `deliver:true`?**
292
+ - Retry / at-least-once delivery guarantee
293
+ - Delivery aliases (scheduler resolves `@team_room` → channel/target)
294
+ - Audit trail (runs table records every attempt)
295
+ - Chain triggers (completion can fire child jobs)
296
+ - Resilient to gateway restarts mid-run
297
+
298
+ ### Watcher script
299
+
300
+ `deliver-watcher.sh` checks the session result. Exit 0 with output = deliver.
301
+ Exit 1 with no output = retry on next cron tick (no spam — `announce-always`
302
+ only delivers when `output.trim()` is truthy).
303
+
304
+ ### Progress check-ins from subagent sessions
305
+
306
+ Subagent sessions run without PATH access to the `openclaw` CLI, so
307
+ `openclaw system event` silently fails. For mid-task progress updates,
308
+ use the gateway HTTP API via curl:
309
+
310
+ ```bash
311
+ GW_TOKEN=$(python3 -c "import json, os; print(json.load(open(os.path.expanduser('~/.openclaw/openclaw.json')))['gateway']['auth']['token'])")
312
+ curl -s -X POST http://127.0.0.1:18789/tools/invoke \
313
+ -H 'Content-Type: application/json' \
314
+ -H "Authorization: Bearer $GW_TOKEN" \
315
+ -d '{"tool":"message","args":{"action":"send","channel":"telegram","target":"<telegram-user-id>","message":"<label>: <progress update>"},"sessionKey":"main"}'
316
+ ```
317
+ ---
318
+
319
+ ## Architecture: Before & After
320
+
321
+ ### Before (scheduler DB dispatch)
322
+ ```
323
+ dispatch enqueue → creates job in scheduler DB → dispatcher picks up on tick
324
+ → runs as isolated session → announces result → hooks fire
325
+ ```
326
+
327
+ ### After (native gateway API)
328
+ ```
329
+ dispatch enqueue → calls gateway API directly → session starts immediately
330
+ → tracks in labels.json → announces result → hooks fire
331
+ ```
332
+
333
+ Key improvements:
334
+ - **Instant dispatch** — no scheduler tick delay (was up to 10s)
335
+ - **Mid-session steering** — `send`/`steer` inject messages into running sessions
336
+ - **No DB dependency** — labels.json is a simple JSON file
337
+ - **Session reuse** — `--mode reuse` continues conversations
338
+ - **Simpler** -- lightweight multi-file CLI vs full DB schema + dispatcher integration
339
+
340
+ ---
341
+
342
+ ## Stuck Run Detector (cron job)
343
+
344
+ ```bash
345
+ openclaw-scheduler jobs add '{
346
+ "name": "Stuck Session Detector",
347
+ "schedule_cron": "*/10 * * * *",
348
+ "session_target": "shell",
349
+ "payload_message": "node ~/.openclaw/scheduler/dispatch/index.mjs stuck --threshold-min 15",
350
+ "delivery_mode": "announce",
351
+ "delivery_channel": "telegram",
352
+ "delivery_to": "YOUR_CHAT_ID"
353
+ }'
354
+ ```
355
+
356
+ ---
357
+
358
+ ## Migration from Scheduler-DB Version
359
+
360
+ If upgrading from the scheduler-DB version:
361
+
362
+ 1. Replace `index.mjs` (this file replaces it)
363
+ 2. `hooks.mjs` is unchanged (no DB imports)
364
+ 3. `labels.json` is created automatically on first `enqueue`
365
+ 4. Old scheduler jobs for dispatch tasks can be removed
366
+ 5. The scheduler DB is no longer needed for dispatch
367
+
368
+ The CLI flags are identical — existing scripts/agents calling dispatch
369
+ don't need changes (except `--mode auto` is gone; use `fresh` or `reuse`).
370
+
371
+ New additions: `steer` subcommand (alias for `send`), `list` subcommand,
372
+ `--model` flag on `enqueue`.