openclaw-scheduler 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +302 -0
- package/BEST-PRACTICES.md +506 -0
- package/CHANGELOG.md +82 -0
- package/CODE_OF_CONDUCT.md +22 -0
- package/CONTEXT.md +26 -0
- package/CONTRIBUTING.md +73 -0
- package/IMPLEMENTATION_SPEC.md +170 -0
- package/INSTALL-ADDITIONAL-HOST.md +333 -0
- package/INSTALL-LINUX.md +419 -0
- package/INSTALL-WINDOWS.md +305 -0
- package/INSTALL.md +364 -0
- package/JOB-QUICK-REF.md +222 -0
- package/LICENSE +21 -0
- package/QUICK-START.md +256 -0
- package/README.md +2170 -0
- package/SECURITY.md +34 -0
- package/UNINSTALL.md +129 -0
- package/UPGRADING.md +436 -0
- package/agents.js +67 -0
- package/approval.js +107 -0
- package/backup.js +390 -0
- package/bin/openclaw-scheduler.js +138 -0
- package/cli.js +1083 -0
- package/db.js +122 -0
- package/dispatch/529-recovery.mjs +204 -0
- package/dispatch/README.md +372 -0
- package/dispatch/config.example.json +24 -0
- package/dispatch/deliver-watcher.sh +57 -0
- package/dispatch/hooks.mjs +171 -0
- package/dispatch/index.mjs +1836 -0
- package/dispatch/watcher.mjs +1396 -0
- package/dispatch-queue.js +112 -0
- package/dispatcher-approvals.js +96 -0
- package/dispatcher-delivery.js +43 -0
- package/dispatcher-maintenance.js +242 -0
- package/dispatcher-shell.js +29 -0
- package/dispatcher-strategies.js +1280 -0
- package/dispatcher-utils.js +81 -0
- package/dispatcher.js +855 -0
- package/docs/adr-schedule-ownership.md +73 -0
- package/docs/gateway-contract.md +904 -0
- package/docs/plans/2026-03-09-fix-typescript-types.md +91 -0
- package/docs/plans/2026-03-09-test-coverage-gaps.md +83 -0
- package/docs/plans/2026-03-10-dispatcher-refactor.md +801 -0
- package/docs/trust-architecture.md +266 -0
- package/gateway.js +473 -0
- package/idempotency.js +119 -0
- package/index.d.ts +864 -0
- package/index.js +17 -0
- package/jobs.js +1224 -0
- package/messages.js +357 -0
- package/migrate-consolidate.js +694 -0
- package/migrate.js +125 -0
- package/package.json +130 -0
- package/paths.js +79 -0
- package/prompt-context.js +94 -0
- package/retrieval.js +176 -0
- package/runs.js +270 -0
- package/scheduler-schema.js +101 -0
- package/schema.sql +480 -0
- package/scripts/dispatch-cli-utils.mjs +65 -0
- package/scripts/inbox-consumer.mjs +288 -0
- package/scripts/stuck-detector.sh +18 -0
- package/scripts/stuck-run-detector.mjs +333 -0
- package/scripts/telegram-webhook-check.mjs +238 -0
- package/setup.mjs +724 -0
- package/shell-result.js +214 -0
- package/task-tracker.js +300 -0
- package/team-adapter.js +335 -0
- package/v02-runtime.js +599 -0
package/db.js
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
// Database layer -- SQLite via better-sqlite3
|
|
2
|
+
import Database from 'better-sqlite3';
|
|
3
|
+
import { readFileSync } from 'fs';
|
|
4
|
+
import { join, dirname } from 'path';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
6
|
+
import { ensureSchedulerDbParent, resolveSchedulerDbPath } from './paths.js';
|
|
7
|
+
|
|
8
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
9
|
+
|
|
10
|
+
let _db;
|
|
11
|
+
let _dbPath;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Override the DB path at runtime (must be called before getDb/initDb).
|
|
15
|
+
* Pass ':memory:' for in-memory test databases.
|
|
16
|
+
*/
|
|
17
|
+
export function setDbPath(path) {
|
|
18
|
+
if (_db) { _db.close(); _db = null; }
|
|
19
|
+
_dbPath = path;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function getDb() {
|
|
23
|
+
if (!_db) {
|
|
24
|
+
const dbPath = _dbPath || resolveSchedulerDbPath({ env: process.env });
|
|
25
|
+
if (dbPath !== ':memory:') ensureSchedulerDbParent(dbPath);
|
|
26
|
+
_db = new Database(dbPath);
|
|
27
|
+
if (dbPath !== ':memory:') _db.pragma('journal_mode = WAL');
|
|
28
|
+
_db.pragma('busy_timeout = 5000');
|
|
29
|
+
_db.pragma('foreign_keys = ON');
|
|
30
|
+
}
|
|
31
|
+
return _db;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function getResolvedDbPath() {
|
|
35
|
+
return _dbPath || resolveSchedulerDbPath({ env: process.env });
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export async function initDb() {
|
|
39
|
+
const db = getDb();
|
|
40
|
+
const schema = readFileSync(join(__dirname, 'schema.sql'), 'utf8');
|
|
41
|
+
const hasUserTables = (db.prepare(`
|
|
42
|
+
SELECT COUNT(*) AS cnt
|
|
43
|
+
FROM sqlite_master
|
|
44
|
+
WHERE type = 'table'
|
|
45
|
+
AND name NOT LIKE 'sqlite_%'
|
|
46
|
+
`).get()?.cnt ?? 0) > 0;
|
|
47
|
+
const applySchema = (label) => {
|
|
48
|
+
try {
|
|
49
|
+
db.exec(schema);
|
|
50
|
+
return true;
|
|
51
|
+
} catch (err) {
|
|
52
|
+
process.stderr.write(`${new Date().toISOString()} [db] ${label}: ${err.message}\n`);
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
const runConsolidate = async () => {
|
|
57
|
+
try {
|
|
58
|
+
const { default: consolidate } = await import('./migrate-consolidate.js');
|
|
59
|
+
const applied = consolidate();
|
|
60
|
+
if (applied) {
|
|
61
|
+
process.stderr.write(`${new Date().toISOString()} [db] Consolidation migration applied\n`);
|
|
62
|
+
}
|
|
63
|
+
} catch (err) {
|
|
64
|
+
process.stderr.write(`${new Date().toISOString()} [db] migrate-consolidate error: ${err.message}\n`);
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
if (hasUserTables) {
|
|
69
|
+
// Existing installs: normalize via migration first so schema re-apply doesn't
|
|
70
|
+
// trip over legacy partial tables/indexes.
|
|
71
|
+
await runConsolidate();
|
|
72
|
+
applySchema('Schema apply warning');
|
|
73
|
+
return db;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Net-new installs: create the baseline schema, then run consolidation in case
|
|
77
|
+
// a package upgrade adds idempotent backfills the base schema doesn't need.
|
|
78
|
+
applySchema('Initial schema apply warning');
|
|
79
|
+
await runConsolidate();
|
|
80
|
+
|
|
81
|
+
// Re-apply schema so indexes/table defs are fully aligned after consolidation.
|
|
82
|
+
applySchema('Schema re-apply warning');
|
|
83
|
+
|
|
84
|
+
return db;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Checkpoint WAL to main DB file. Call periodically to minimize
|
|
89
|
+
* data loss window on crash/SIGKILL. Returns checkpoint stats.
|
|
90
|
+
*/
|
|
91
|
+
export function checkpointWal() {
|
|
92
|
+
if (!_db) return null;
|
|
93
|
+
try {
|
|
94
|
+
const result = _db.pragma('wal_checkpoint(PASSIVE)');
|
|
95
|
+
return result?.[0] || null;
|
|
96
|
+
} catch (err) {
|
|
97
|
+
const ts = new Date().toISOString();
|
|
98
|
+
process.stderr.write(`${ts} [db] WAL checkpoint error: ${err.message}\n`);
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function closeDb() {
|
|
104
|
+
if (_db) {
|
|
105
|
+
try {
|
|
106
|
+
// Checkpoint WAL to main DB before closing to prevent data loss
|
|
107
|
+
const result = _db.pragma('wal_checkpoint(TRUNCATE)');
|
|
108
|
+
const ts = new Date().toISOString();
|
|
109
|
+
if (result && result[0]) {
|
|
110
|
+
const r = result[0];
|
|
111
|
+
process.stderr.write(`${ts} [db] WAL checkpoint on close: busy=${r.busy}, checkpointed=${r.checkpointed}, log=${r.log}\n`);
|
|
112
|
+
} else {
|
|
113
|
+
process.stderr.write(`${ts} [db] WAL checkpoint on close: ok\n`);
|
|
114
|
+
}
|
|
115
|
+
} catch (err) {
|
|
116
|
+
const ts = new Date().toISOString();
|
|
117
|
+
process.stderr.write(`${ts} [db] WAL checkpoint failed on close: ${err.message}\n`);
|
|
118
|
+
}
|
|
119
|
+
_db.close();
|
|
120
|
+
_db = null;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* dispatch 529 recovery -- scheduler safety net for 529/overload errors.
|
|
4
|
+
*
|
|
5
|
+
* Scans labels.json for sessions in 'error' state with 529/overload patterns.
|
|
6
|
+
* If retryCount < MAX_RETRIES and the watcher hasn't already handled it,
|
|
7
|
+
* re-enqueues the session.
|
|
8
|
+
*
|
|
9
|
+
* Idempotency:
|
|
10
|
+
* - Checks retryCount + lastRetryAt to avoid double-retrying if the watcher
|
|
11
|
+
* already handled it (watcher updates retryCount and status immediately).
|
|
12
|
+
* - If status is already 'running', skip (watcher handled it).
|
|
13
|
+
* - If retryCount >= MAX, skip (already exhausted).
|
|
14
|
+
*
|
|
15
|
+
* Run by scheduler every 10 minutes as a safety net.
|
|
16
|
+
*
|
|
17
|
+
* Exit codes:
|
|
18
|
+
* 0 -- all good (nothing to retry, or retries dispatched)
|
|
19
|
+
* 1 -- error
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { readFileSync, writeFileSync, renameSync } from 'fs';
|
|
23
|
+
import { execFileSync } from 'child_process';
|
|
24
|
+
import { dirname, join } from 'path';
|
|
25
|
+
import { fileURLToPath } from 'url';
|
|
26
|
+
|
|
27
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
28
|
+
const LABELS_PATH = process.env.DISPATCH_LABELS_PATH || join(__dirname, 'labels.json');
|
|
29
|
+
const INDEX_PATH = process.env.DISPATCH_INDEX_PATH || join(__dirname, 'index.mjs');
|
|
30
|
+
|
|
31
|
+
const MAX_RETRIES = 3;
|
|
32
|
+
// Only recover errors that happened within the last 60 minutes
|
|
33
|
+
// (don't revive ancient failures)
|
|
34
|
+
const MAX_ERROR_AGE_MS = 60 * 60 * 1000;
|
|
35
|
+
// Minimum time since last retry before the safety net triggers
|
|
36
|
+
// (give the watcher time to handle it first -- 5 minutes)
|
|
37
|
+
const MIN_SINCE_LAST_UPDATE_MS = 5 * 60 * 1000;
|
|
38
|
+
|
|
39
|
+
const OVERLOAD_PATTERNS = [
|
|
40
|
+
/529/i,
|
|
41
|
+
/failover\s*error/i,
|
|
42
|
+
/overload/i,
|
|
43
|
+
/temporarily\s+overloaded/i,
|
|
44
|
+
/service.*overloaded/i,
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
function is529Error(errorMsg) {
|
|
48
|
+
if (!errorMsg || typeof errorMsg !== 'string') return false;
|
|
49
|
+
return OVERLOAD_PATTERNS.some(p => p.test(errorMsg));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function loadLabels() {
|
|
53
|
+
try {
|
|
54
|
+
return JSON.parse(readFileSync(LABELS_PATH, 'utf-8'));
|
|
55
|
+
} catch {
|
|
56
|
+
return {};
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function saveLabels(labels) {
|
|
61
|
+
const tmp = LABELS_PATH + '.tmp.' + process.pid;
|
|
62
|
+
writeFileSync(tmp, JSON.stringify(labels, null, 2) + '\n');
|
|
63
|
+
renameSync(tmp, LABELS_PATH);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function notify(message) {
|
|
67
|
+
try {
|
|
68
|
+
const cliPath = join(__dirname, '..', 'cli.js');
|
|
69
|
+
execFileSync(process.execPath, [cliPath, 'msg', 'send', 'scheduler', 'main', message], {
|
|
70
|
+
encoding: 'utf-8',
|
|
71
|
+
timeout: 10000,
|
|
72
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
73
|
+
});
|
|
74
|
+
} catch {}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function respawnSession(label, entry) {
|
|
78
|
+
const continuationMsg = `[Auto-retry after 529 overload -- scheduler safety net] This is an automatic retry. Please continue your previous task from where you left off.`;
|
|
79
|
+
|
|
80
|
+
// Try send (reuse session) first
|
|
81
|
+
try {
|
|
82
|
+
execFileSync(process.execPath, [
|
|
83
|
+
INDEX_PATH, 'send',
|
|
84
|
+
'--label', label,
|
|
85
|
+
'--message', continuationMsg,
|
|
86
|
+
], {
|
|
87
|
+
encoding: 'utf-8',
|
|
88
|
+
timeout: 30000,
|
|
89
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
90
|
+
});
|
|
91
|
+
return 'send';
|
|
92
|
+
} catch {}
|
|
93
|
+
|
|
94
|
+
// Fallback: fresh enqueue
|
|
95
|
+
try {
|
|
96
|
+
const args = [
|
|
97
|
+
INDEX_PATH, 'enqueue',
|
|
98
|
+
'--label', label,
|
|
99
|
+
'--message', continuationMsg,
|
|
100
|
+
'--mode', 'fresh',
|
|
101
|
+
];
|
|
102
|
+
if (entry?.model) args.push('--model', entry.model);
|
|
103
|
+
if (entry?.thinking) args.push('--thinking', entry.thinking);
|
|
104
|
+
if (entry?.origin) args.push('--origin', entry.origin);
|
|
105
|
+
if (entry?.deliverTo) {
|
|
106
|
+
args.push('--deliver-to', entry.deliverTo);
|
|
107
|
+
if (entry?.deliveryMode) args.push('--delivery-mode', entry.deliveryMode);
|
|
108
|
+
if (entry?.deliverChannel) args.push('--deliver-channel', entry.deliverChannel);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
execFileSync(process.execPath, args, {
|
|
112
|
+
encoding: 'utf-8',
|
|
113
|
+
timeout: 30000,
|
|
114
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
115
|
+
});
|
|
116
|
+
return 'fresh';
|
|
117
|
+
} catch {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// -- Main ----------------------------------------------------
|
|
123
|
+
|
|
124
|
+
const labels = loadLabels();
|
|
125
|
+
const now = Date.now();
|
|
126
|
+
const results = [];
|
|
127
|
+
for (const [name, entry] of Object.entries(labels)) {
|
|
128
|
+
// Only look at error-state sessions
|
|
129
|
+
if (entry.status !== 'error') continue;
|
|
130
|
+
|
|
131
|
+
const errorMsg = entry.error || '';
|
|
132
|
+
if (!is529Error(errorMsg)) continue;
|
|
133
|
+
|
|
134
|
+
// Check age -- don't retry very old errors
|
|
135
|
+
const updatedAt = entry.updatedAt ? new Date(entry.updatedAt).getTime() : 0;
|
|
136
|
+
const errorAge = now - updatedAt;
|
|
137
|
+
if (errorAge > MAX_ERROR_AGE_MS) {
|
|
138
|
+
results.push({ label: name, action: 'skip', reason: `error too old (${Math.round(errorAge / 60000)}min)` });
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Check if watcher already handled it (updated recently)
|
|
143
|
+
if (errorAge < MIN_SINCE_LAST_UPDATE_MS) {
|
|
144
|
+
results.push({ label: name, action: 'skip', reason: `updated ${Math.round(errorAge / 1000)}s ago (watcher may be handling)` });
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Check retry count
|
|
149
|
+
const retryCount = entry.retryCount || 0;
|
|
150
|
+
if (retryCount >= MAX_RETRIES) {
|
|
151
|
+
results.push({ label: name, action: 'skip', reason: `max retries exhausted (${retryCount}/${MAX_RETRIES})` });
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Attempt retry
|
|
156
|
+
const newRetryCount = retryCount + 1;
|
|
157
|
+
process.stderr.write(`[529-recovery] retrying [${name}] (attempt ${newRetryCount}/${MAX_RETRIES})\n`);
|
|
158
|
+
|
|
159
|
+
// Update labels.json first (claim the retry)
|
|
160
|
+
const freshLabels = loadLabels();
|
|
161
|
+
if (freshLabels[name]) {
|
|
162
|
+
freshLabels[name].retryCount = newRetryCount;
|
|
163
|
+
freshLabels[name].updatedAt = new Date().toISOString();
|
|
164
|
+
saveLabels(freshLabels);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const method = respawnSession(name, entry);
|
|
168
|
+
if (method) {
|
|
169
|
+
// Mark as running
|
|
170
|
+
const updated = loadLabels();
|
|
171
|
+
if (updated[name]) {
|
|
172
|
+
updated[name].status = 'running';
|
|
173
|
+
updated[name].error = null;
|
|
174
|
+
updated[name].updatedAt = new Date().toISOString();
|
|
175
|
+
saveLabels(updated);
|
|
176
|
+
}
|
|
177
|
+
notify(`🌶️ Dispatch 529 recovery: [${name}] retried (${newRetryCount}/${MAX_RETRIES}) via ${method}`);
|
|
178
|
+
results.push({ label: name, action: 'retried', method, retryCount: newRetryCount });
|
|
179
|
+
} else {
|
|
180
|
+
notify(`🌶️ Dispatch 529 recovery: [${name}] retry FAILED (${newRetryCount}/${MAX_RETRIES})`);
|
|
181
|
+
results.push({ label: name, action: 'retry_failed', retryCount: newRetryCount });
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Output summary -- scheduler delivers stdout if non-empty and delivery_mode=announce
|
|
186
|
+
if (results.length > 0) {
|
|
187
|
+
const retried = results.filter(r => r.action === 'retried');
|
|
188
|
+
const skipped = results.filter(r => r.action === 'skip');
|
|
189
|
+
const failed = results.filter(r => r.action === 'retry_failed');
|
|
190
|
+
|
|
191
|
+
const lines = [];
|
|
192
|
+
if (retried.length) lines.push(`✅ Retried: ${retried.map(r => r.label).join(', ')}`);
|
|
193
|
+
if (failed.length) lines.push(`❌ Failed: ${failed.map(r => r.label).join(', ')}`);
|
|
194
|
+
if (skipped.length) lines.push(`⏭️ Skipped: ${skipped.map(r => `${r.label} (${r.reason})`).join(', ')}`);
|
|
195
|
+
|
|
196
|
+
// Only produce stdout (which triggers delivery) if we actually retried or failed something
|
|
197
|
+
if (retried.length || failed.length) {
|
|
198
|
+
process.stdout.write(`🌶️ 529 Recovery:\n${lines.join('\n')}\n`);
|
|
199
|
+
} else {
|
|
200
|
+
process.stderr.write(`[529-recovery] scan complete: ${skipped.length} skipped\n`);
|
|
201
|
+
}
|
|
202
|
+
} else {
|
|
203
|
+
process.stderr.write('[529-recovery] scan complete: no 529 errors found\n');
|
|
204
|
+
}
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
# dispatch
|
|
2
|
+
|
|
3
|
+
**Sub-agent dispatch CLI for OpenClaw — native gateway API edition.**
|
|
4
|
+
|
|
5
|
+
dispatch spawns and steers isolated agent sessions directly via the OpenClaw
|
|
6
|
+
Gateway API. It tracks label→session mappings in a local JSON ledger, giving
|
|
7
|
+
you a simple CLI to dispatch work, check on it, steer it mid-run, and get
|
|
8
|
+
results back.
|
|
9
|
+
|
|
10
|
+
No scheduler DB dependency. No dispatcher tick delay. Sessions start instantly.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Files
|
|
15
|
+
|
|
16
|
+
| File | Purpose |
|
|
17
|
+
|---|---|
|
|
18
|
+
| `index.mjs` | CLI entry point — 10 subcommands |
|
|
19
|
+
| `hooks.mjs` | Lifecycle event emitter (Loki + optional HTTP webhook) |
|
|
20
|
+
| `watcher.mjs` | Delivery monitoring process |
|
|
21
|
+
| `529-recovery.mjs` | Transient error recovery |
|
|
22
|
+
| `deliver-watcher.sh` | Shell wrapper for result retrieval |
|
|
23
|
+
| `chilisaus.mjs` | Branded wrapper |
|
|
24
|
+
| `config.example.json` | Example config |
|
|
25
|
+
| `test-done-postoffice.mjs` | Done handler test |
|
|
26
|
+
| `labels.json` | Local label→session ledger (gitignored) |
|
|
27
|
+
| `README.md` | This file |
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## How it works
|
|
32
|
+
|
|
33
|
+
dispatch calls the OpenClaw Gateway RPC API directly:
|
|
34
|
+
|
|
35
|
+
1. **`sessions.patch`** — configure the session (model, thinking level, spawn depth)
|
|
36
|
+
2. **`agent`** — send a message into the session (spawning it if new)
|
|
37
|
+
3. **`sessions.list`** — query session status and liveness
|
|
38
|
+
4. **`chat.history`** — read session transcripts for results
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
Orchestrator calls:
|
|
42
|
+
dispatch enqueue --label ticket-42 --message "Fix the deploy script"
|
|
43
|
+
|
|
44
|
+
→ Creates session key: agent:main:subagent:<uuid>
|
|
45
|
+
→ Patches session with model/thinking/spawnDepth
|
|
46
|
+
→ Calls gateway `agent` method with the task
|
|
47
|
+
→ Session starts immediately (no scheduler tick delay)
|
|
48
|
+
→ Tracks label→sessionKey in labels.json
|
|
49
|
+
→ Agent auto-announces results on completion
|
|
50
|
+
→ hooks.mjs fires dispatch.started to Loki
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Subcommands
|
|
56
|
+
|
|
57
|
+
### `enqueue` — spawn a new session
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
node dispatch/index.mjs enqueue \
|
|
61
|
+
--label "ticket-42" \
|
|
62
|
+
--message "Fix the deploy script" \
|
|
63
|
+
--mode fresh \ # fresh | reuse
|
|
64
|
+
--agent main \
|
|
65
|
+
--model anthropic/claude-sonnet-4-6 \
|
|
66
|
+
--thinking high \
|
|
67
|
+
--timeout 300 \
|
|
68
|
+
--deliver-to YOUR_CHAT_ID \
|
|
69
|
+
--deliver-channel telegram \
|
|
70
|
+
--delivery-mode announce
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
| Flag | Default | Description |
|
|
74
|
+
|---|---|---|
|
|
75
|
+
| `--label` | required | Human name — used for lookup/reuse |
|
|
76
|
+
| `--message` | required | Prompt sent to the agent |
|
|
77
|
+
| `--mode` | `fresh` | `fresh` = new session; `reuse` = continue last session for this label |
|
|
78
|
+
| `--session-key` | — | Explicit session key (bypasses ledger lookup) |
|
|
79
|
+
| `--agent` | `main` | Agent ID |
|
|
80
|
+
| `--model` | — | Model override (e.g. `anthropic/claude-sonnet-4-6`) |
|
|
81
|
+
| `--thinking` | — | Reasoning level: `low`, `high`, `xhigh` |
|
|
82
|
+
| `--timeout` | `300` | Seconds before run times out |
|
|
83
|
+
| `--deliver-to` | — | Delivery target (chat ID, channel ID, handle, etc.). Enables `deliver:true` on the gateway call |
|
|
84
|
+
| `--deliver-channel` | `telegram` | Delivery channel for `--deliver-to` (telegram, slack, etc.) |
|
|
85
|
+
| `--delivery-mode` | `announce` | `announce`, `announce-always`, `none` |
|
|
86
|
+
| `--origin` | -- | Dispatch origin (e.g. `telegram:12345`) |
|
|
87
|
+
| `--no-monitor` | false | Skip watcher monitoring |
|
|
88
|
+
| `--monitor-interval` | -- | Watcher cron expression |
|
|
89
|
+
| `--monitor-timeout` | -- | Watcher timeout in minutes |
|
|
90
|
+
| `--verify-cmd` | -- | Post-completion verification command |
|
|
91
|
+
|
|
92
|
+
### `status` — session status for a label
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
node dispatch/index.mjs status --label "ticket-42"
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Returns ledger info + live session data from gateway (model, age, token usage).
|
|
99
|
+
|
|
100
|
+
### `stuck` — find stuck running sessions
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
node dispatch/index.mjs stuck --threshold-min 15
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Exit 0 = nothing stuck (silent).
|
|
107
|
+
Exit 1 = stuck sessions found (triggers announce delivery).
|
|
108
|
+
|
|
109
|
+
Checks labels.json for sessions marked `running`, cross-references gateway
|
|
110
|
+
session store for last activity timestamp.
|
|
111
|
+
|
|
112
|
+
### `result` — last assistant reply from a session
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
node dispatch/index.mjs result --label "ticket-42"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Reads the session transcript via `chat.history` and returns the last assistant
|
|
119
|
+
message.
|
|
120
|
+
|
|
121
|
+
### `done` — mark a tracked session complete
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
node dispatch/index.mjs done \
|
|
125
|
+
--label "ticket-42" \
|
|
126
|
+
--summary "Work complete" \
|
|
127
|
+
--checklist '{"work_complete":true}'
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Marks the label as `done` immediately so the watcher can resolve the run without
|
|
131
|
+
waiting for timeout polling.
|
|
132
|
+
|
|
133
|
+
| Flag | Default | Description |
|
|
134
|
+
|---|---|---|
|
|
135
|
+
| `--label` | required | Label to mark complete |
|
|
136
|
+
| `--summary` | `completed (agent signal)` | One-line completion summary |
|
|
137
|
+
| `--checklist` | required | JSON object. Must include `work_complete:true`; optional fields like `tests_passed` and `pushed` may not be `false` |
|
|
138
|
+
| `--sha` | — | Required when the stored task prompt includes real git commands like `git push`, `git rebase`, `git cherry-pick`, `--force-with-lease`, or `--force-push` |
|
|
139
|
+
| `--force-done` | false | Override the minimum-runtime guard for legitimate short tasks |
|
|
140
|
+
| `--reason` | — | Required with `--force-done`; records why an unusually short session is still valid |
|
|
141
|
+
| `--skip-activity-check` | false | Bypass the gateway message-count heuristic when that check is too strict for the task |
|
|
142
|
+
|
|
143
|
+
Notes:
|
|
144
|
+
- The minimum runtime guard rejects very short sessions unless `--force-done --reason ...` is provided.
|
|
145
|
+
- Older labels created before `taskPrompt` storage will warn and skip the git-SHA gate.
|
|
146
|
+
- Gateway activity checks fail open: if the session API is unavailable, `done` logs a warning and continues.
|
|
147
|
+
|
|
148
|
+
### `send` — message a running session
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
node dispatch/index.mjs send \
|
|
152
|
+
--label "ticket-42" \
|
|
153
|
+
--message "Tests still failing on line 42, focus on the edge case"
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Sends a message directly into the running session. The agent sees it as a new
|
|
157
|
+
user turn and continues working. This is the **mid-session steering superpower**.
|
|
158
|
+
|
|
159
|
+
### `steer` — alias for send
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
node dispatch/index.mjs steer \
|
|
163
|
+
--label "ticket-42" \
|
|
164
|
+
--message "Change approach: use the new API instead"
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Identical to `send`. The name makes intent explicit.
|
|
168
|
+
|
|
169
|
+
### `heartbeat` — check session liveness
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
node dispatch/index.mjs heartbeat --label "ticket-42"
|
|
173
|
+
# or:
|
|
174
|
+
node dispatch/index.mjs heartbeat --session-key "agent:main:subagent:..."
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Returns whether the session is alive (updated within the last 10 minutes),
|
|
178
|
+
plus session metadata.
|
|
179
|
+
|
|
180
|
+
### `list` — list all tracked labels
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
node dispatch/index.mjs list [--status running] [--limit 10]
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Shows all labels in the ledger, sorted by most recent. Filter by status.
|
|
187
|
+
|
|
188
|
+
### `sync` -- reconcile labels with sessions store
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
node dispatch/index.mjs sync
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Reconciles `labels.json` with the gateway sessions store. Sessions that no
|
|
195
|
+
longer exist on the gateway are marked stale, and sessions present on the
|
|
196
|
+
gateway but missing from the ledger are imported. Useful after gateway restarts
|
|
197
|
+
or manual session cleanup.
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Session Reuse
|
|
202
|
+
|
|
203
|
+
`--mode reuse` looks up the last session key for this label in `labels.json`
|
|
204
|
+
and sends the new message into that existing session. The agent picks up where
|
|
205
|
+
it left off with full conversation history.
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
# First run — fresh session
|
|
209
|
+
node dispatch/index.mjs enqueue --label "daily-report" --message "Generate today's report"
|
|
210
|
+
|
|
211
|
+
# Later — continue in the same session
|
|
212
|
+
node dispatch/index.mjs enqueue --label "daily-report" --message "Add the Q4 numbers" --mode reuse
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## Labels Ledger (`labels.json`)
|
|
218
|
+
|
|
219
|
+
Local JSON file mapping labels to session keys:
|
|
220
|
+
|
|
221
|
+
```json
|
|
222
|
+
{
|
|
223
|
+
"ticket-42": {
|
|
224
|
+
"sessionKey": "agent:main:subagent:9131309b-...",
|
|
225
|
+
"runId": "46030a3d-...",
|
|
226
|
+
"agent": "main",
|
|
227
|
+
"mode": "fresh",
|
|
228
|
+
"model": null,
|
|
229
|
+
"thinking": null,
|
|
230
|
+
"spawnedAt": "2026-03-01T04:27:52.181Z",
|
|
231
|
+
"status": "running",
|
|
232
|
+
"summary": null,
|
|
233
|
+
"error": null,
|
|
234
|
+
"updatedAt": "2026-03-01T04:27:52.182Z"
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Gitignored by default. Session-local, not shared.
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Lifecycle Hooks (`hooks.mjs`)
|
|
244
|
+
|
|
245
|
+
Fires structured events to Loki and/or an HTTP webhook:
|
|
246
|
+
|
|
247
|
+
| Event | When |
|
|
248
|
+
|---|---|
|
|
249
|
+
| `dispatch.started` | Session spawned |
|
|
250
|
+
| `dispatch.finished` | Session completed |
|
|
251
|
+
| `dispatch.stuck` | `stuck` subcommand found stuck sessions |
|
|
252
|
+
|
|
253
|
+
**Configuration:**
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
export LOKI_PUSH_URL=http://your-loki-host/loki/api/v1/push
|
|
257
|
+
export DISPATCH_WEBHOOK_URL=https://your-endpoint.example.com/hook
|
|
258
|
+
export DISPATCH_HOST=my-agent-host
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
## Gateway Auth
|
|
264
|
+
|
|
265
|
+
dispatch reads the gateway token from:
|
|
266
|
+
1. `OPENCLAW_GATEWAY_TOKEN` environment variable
|
|
267
|
+
2. `~/.openclaw/openclaw.json` → `gateway.auth.token`
|
|
268
|
+
|
|
269
|
+
No manual token configuration needed on a standard OpenClaw install.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Delivery
|
|
274
|
+
|
|
275
|
+
### How it works
|
|
276
|
+
|
|
277
|
+
When `--deliver-to` is set, dispatch registers a **scheduler watcher job**
|
|
278
|
+
after dispatching the session. The watcher polls the session result every
|
|
279
|
+
minute until the agent produces a reply, then delivers via the scheduler's
|
|
280
|
+
`handleDelivery` pipeline.
|
|
281
|
+
|
|
282
|
+
```
|
|
283
|
+
dispatch enqueue --deliver-to <telegram-user-id>
|
|
284
|
+
-> gateway agent call (deliver: false, fire-and-forget)
|
|
285
|
+
-> scheduler job: <brand>-deliver:<label> (run_now: true, shell, one-shot)
|
|
286
|
+
-> watcher.mjs: long-running blocking process polls session status
|
|
287
|
+
-> on success (exit 0): scheduler delivers output to telegram/<telegram-user-id>
|
|
288
|
+
-> job auto-prunes via ttl_hours (default 48h)
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
**Why scheduler instead of gateway `deliver:true`?**
|
|
292
|
+
- Retry / at-least-once delivery guarantee
|
|
293
|
+
- Delivery aliases (scheduler resolves `@team_room` → channel/target)
|
|
294
|
+
- Audit trail (runs table records every attempt)
|
|
295
|
+
- Chain triggers (completion can fire child jobs)
|
|
296
|
+
- Resilient to gateway restarts mid-run
|
|
297
|
+
|
|
298
|
+
### Watcher script
|
|
299
|
+
|
|
300
|
+
`deliver-watcher.sh` checks the session result. Exit 0 with output = deliver.
|
|
301
|
+
Exit 1 with no output = retry on next cron tick (no spam — `announce-always`
|
|
302
|
+
only delivers when `output.trim()` is truthy).
|
|
303
|
+
|
|
304
|
+
### Progress check-ins from subagent sessions
|
|
305
|
+
|
|
306
|
+
Subagent sessions run without PATH access to the `openclaw` CLI, so
|
|
307
|
+
`openclaw system event` silently fails. For mid-task progress updates,
|
|
308
|
+
use the gateway HTTP API via curl:
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
GW_TOKEN=$(python3 -c "import json, os; print(json.load(open(os.path.expanduser('~/.openclaw/openclaw.json')))['gateway']['auth']['token'])")
|
|
312
|
+
curl -s -X POST http://127.0.0.1:18789/tools/invoke \
|
|
313
|
+
-H 'Content-Type: application/json' \
|
|
314
|
+
-H "Authorization: Bearer $GW_TOKEN" \
|
|
315
|
+
-d '{"tool":"message","args":{"action":"send","channel":"telegram","target":"<telegram-user-id>","message":"<label>: <progress update>"},"sessionKey":"main"}'
|
|
316
|
+
```
|
|
317
|
+
---
|
|
318
|
+
|
|
319
|
+
## Architecture: Before & After
|
|
320
|
+
|
|
321
|
+
### Before (scheduler DB dispatch)
|
|
322
|
+
```
|
|
323
|
+
dispatch enqueue → creates job in scheduler DB → dispatcher picks up on tick
|
|
324
|
+
→ runs as isolated session → announces result → hooks fire
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
### After (native gateway API)
|
|
328
|
+
```
|
|
329
|
+
dispatch enqueue → calls gateway API directly → session starts immediately
|
|
330
|
+
→ tracks in labels.json → announces result → hooks fire
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
Key improvements:
|
|
334
|
+
- **Instant dispatch** — no scheduler tick delay (was up to 10s)
|
|
335
|
+
- **Mid-session steering** — `send`/`steer` inject messages into running sessions
|
|
336
|
+
- **No DB dependency** — labels.json is a simple JSON file
|
|
337
|
+
- **Session reuse** — `--mode reuse` continues conversations
|
|
338
|
+
- **Simpler** -- lightweight multi-file CLI vs full DB schema + dispatcher integration
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## Stuck Run Detector (cron job)
|
|
343
|
+
|
|
344
|
+
```bash
|
|
345
|
+
openclaw-scheduler jobs add '{
|
|
346
|
+
"name": "Stuck Session Detector",
|
|
347
|
+
"schedule_cron": "*/10 * * * *",
|
|
348
|
+
"session_target": "shell",
|
|
349
|
+
"payload_message": "node ~/.openclaw/scheduler/dispatch/index.mjs stuck --threshold-min 15",
|
|
350
|
+
"delivery_mode": "announce",
|
|
351
|
+
"delivery_channel": "telegram",
|
|
352
|
+
"delivery_to": "YOUR_CHAT_ID"
|
|
353
|
+
}'
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
---
|
|
357
|
+
|
|
358
|
+
## Migration from Scheduler-DB Version
|
|
359
|
+
|
|
360
|
+
If upgrading from the scheduler-DB version:
|
|
361
|
+
|
|
362
|
+
1. Replace `index.mjs` (this file replaces it)
|
|
363
|
+
2. `hooks.mjs` is unchanged (no DB imports)
|
|
364
|
+
3. `labels.json` is created automatically on first `enqueue`
|
|
365
|
+
4. Old scheduler jobs for dispatch tasks can be removed
|
|
366
|
+
5. The scheduler DB is no longer needed for dispatch
|
|
367
|
+
|
|
368
|
+
The CLI flags are identical — existing scripts/agents calling dispatch
|
|
369
|
+
don't need changes (except `--mode auto` is gone; use `fresh` or `reuse`).
|
|
370
|
+
|
|
371
|
+
New additions: `steer` subcommand (alias for `send`), `list` subcommand,
|
|
372
|
+
`--model` flag on `enqueue`.
|