openclaw-scheduler 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/AGENTS.md +302 -0
  2. package/BEST-PRACTICES.md +506 -0
  3. package/CHANGELOG.md +82 -0
  4. package/CODE_OF_CONDUCT.md +22 -0
  5. package/CONTEXT.md +26 -0
  6. package/CONTRIBUTING.md +73 -0
  7. package/IMPLEMENTATION_SPEC.md +170 -0
  8. package/INSTALL-ADDITIONAL-HOST.md +333 -0
  9. package/INSTALL-LINUX.md +419 -0
  10. package/INSTALL-WINDOWS.md +305 -0
  11. package/INSTALL.md +364 -0
  12. package/JOB-QUICK-REF.md +222 -0
  13. package/LICENSE +21 -0
  14. package/QUICK-START.md +256 -0
  15. package/README.md +2170 -0
  16. package/SECURITY.md +34 -0
  17. package/UNINSTALL.md +129 -0
  18. package/UPGRADING.md +436 -0
  19. package/agents.js +67 -0
  20. package/approval.js +107 -0
  21. package/backup.js +390 -0
  22. package/bin/openclaw-scheduler.js +138 -0
  23. package/cli.js +1083 -0
  24. package/db.js +122 -0
  25. package/dispatch/529-recovery.mjs +204 -0
  26. package/dispatch/README.md +372 -0
  27. package/dispatch/config.example.json +24 -0
  28. package/dispatch/deliver-watcher.sh +57 -0
  29. package/dispatch/hooks.mjs +171 -0
  30. package/dispatch/index.mjs +1836 -0
  31. package/dispatch/watcher.mjs +1396 -0
  32. package/dispatch-queue.js +112 -0
  33. package/dispatcher-approvals.js +96 -0
  34. package/dispatcher-delivery.js +43 -0
  35. package/dispatcher-maintenance.js +242 -0
  36. package/dispatcher-shell.js +29 -0
  37. package/dispatcher-strategies.js +1280 -0
  38. package/dispatcher-utils.js +81 -0
  39. package/dispatcher.js +855 -0
  40. package/docs/adr-schedule-ownership.md +73 -0
  41. package/docs/gateway-contract.md +904 -0
  42. package/docs/plans/2026-03-09-fix-typescript-types.md +91 -0
  43. package/docs/plans/2026-03-09-test-coverage-gaps.md +83 -0
  44. package/docs/plans/2026-03-10-dispatcher-refactor.md +801 -0
  45. package/docs/trust-architecture.md +266 -0
  46. package/gateway.js +473 -0
  47. package/idempotency.js +119 -0
  48. package/index.d.ts +864 -0
  49. package/index.js +17 -0
  50. package/jobs.js +1224 -0
  51. package/messages.js +357 -0
  52. package/migrate-consolidate.js +694 -0
  53. package/migrate.js +125 -0
  54. package/package.json +130 -0
  55. package/paths.js +79 -0
  56. package/prompt-context.js +94 -0
  57. package/retrieval.js +176 -0
  58. package/runs.js +270 -0
  59. package/scheduler-schema.js +101 -0
  60. package/schema.sql +480 -0
  61. package/scripts/dispatch-cli-utils.mjs +65 -0
  62. package/scripts/inbox-consumer.mjs +288 -0
  63. package/scripts/stuck-detector.sh +18 -0
  64. package/scripts/stuck-run-detector.mjs +333 -0
  65. package/scripts/telegram-webhook-check.mjs +238 -0
  66. package/setup.mjs +724 -0
  67. package/shell-result.js +214 -0
  68. package/task-tracker.js +300 -0
  69. package/team-adapter.js +335 -0
  70. package/v02-runtime.js +599 -0
@@ -0,0 +1,1280 @@
1
+ // dispatcher-strategies.js
2
+ // Strategy pattern for dispatchJob: each execution target returns a DispatchResult,
3
+ // and finalizeDispatch processes it uniformly.
4
+
5
+ /**
6
+ * DispatchResult shape (returned by every strategy):
7
+ * {
8
+ * status: 'ok' | 'error' | 'skipped',
9
+ * summary: string,
10
+ * content: string, // for delivery + trigger condition eval
11
+ * errorMessage: string | null,
12
+ * runFinishFields: object, // extra fields for finishRun (shell_exit_code, etc.)
13
+ * deliveryOverride: string | null, // override delivery content (null = use content)
14
+ * skipDelivery: boolean, // suppress delivery entirely
15
+ * skipJobUpdate: boolean, // strategy handled job state itself
16
+ * skipChildren: boolean, // don't fire triggered children
17
+ * skipDequeue: boolean, // don't drain overlap queue
18
+ * idemAction: 'keep' | 'release' | 'noop', // what to do with idempotency key
19
+ * retryFiresChildren: boolean, // whether retry path fires triggered children
20
+ * earlyReturn: boolean, // finalize should skip everything (strategy fully handled it)
21
+ * }
22
+ */
23
+
24
+ export function makeDefaultResult() {
25
+ return {
26
+ status: 'ok',
27
+ summary: '',
28
+ content: '',
29
+ errorMessage: null,
30
+ runFinishFields: {},
31
+ deliveryOverride: null,
32
+ skipDelivery: false,
33
+ skipJobUpdate: false,
34
+ skipChildren: false,
35
+ skipDequeue: false,
36
+ skipAgentCleanup: true,
37
+ idemAction: 'noop',
38
+ retryFiresChildren: false,
39
+ earlyReturn: false,
40
+ };
41
+ }
42
+
43
+ /** Safely parse a JSON string. Returns parsed value or null on failure. */
44
+ function safeParse(str) {
45
+ if (str == null || str === '') return null;
46
+ try {
47
+ return JSON.parse(str);
48
+ } catch (_e) {
49
+ return null;
50
+ }
51
+ }
52
+
53
+ function getIdentityTrustLevel(identity) {
54
+ if (!identity || typeof identity !== 'object') return null;
55
+ return identity.trust_level
56
+ || identity.trust?.effective_level
57
+ || identity.trust?.level
58
+ || identity.session?.trust?.effective_level
59
+ || identity.session?.trust?.level
60
+ || identity.raw?.trust_level
61
+ || identity.raw?.trust?.effective_level
62
+ || identity.raw?.trust?.level
63
+ || null;
64
+ }
65
+
66
+ function getJobTrustLevel(job, parsedIdentity = null) {
67
+ const identityBlob = parsedIdentity || safeParse(job?.identity);
68
+ return getIdentityTrustLevel(identityBlob) || job?.identity_trust_level || null;
69
+ }
70
+
71
+ function hasIdentityDeclaration(job) {
72
+ if (!job) return false;
73
+ return job.identity != null
74
+ || job.identity_ref != null
75
+ || job.identity_principal != null
76
+ || job.identity_run_as != null
77
+ || job.identity_attestation != null
78
+ || job.identity_subject_kind != null
79
+ || job.identity_subject_principal != null
80
+ || job.identity_trust_level != null
81
+ || job.identity_delegation_mode != null;
82
+ }
83
+
84
+ /**
85
+ * Redact session credentials from v02Outcomes before DB persistence.
86
+ * Uses the provider's describeSession() for redaction when available,
87
+ * otherwise strips the credentials key directly.
88
+ */
89
+ export function redactOutcomesForPersistence(outcomes, deps) {
90
+ if (!outcomes?.identity_resolved?.session?.credentials) return outcomes;
91
+ const redacted = { ...outcomes };
92
+ const ir = { ...redacted.identity_resolved };
93
+ const session = { ...ir.session };
94
+
95
+ const providerName = ir.provider;
96
+ const provider = providerName && deps?.getIdentityProvider?.(providerName);
97
+ if (provider && typeof provider.describeSession === 'function') {
98
+ try {
99
+ ir.session = provider.describeSession(session);
100
+ } catch (_err) {
101
+ delete session.credentials;
102
+ ir.session = session;
103
+ }
104
+ } else {
105
+ delete session.credentials;
106
+ ir.session = session;
107
+ }
108
+
109
+ redacted.identity_resolved = ir;
110
+ return redacted;
111
+ }
112
+
113
+ function abortPreparedRun(job, run, summary, outcomes, state, deps, opts = {}) {
114
+ const {
115
+ finishRun, persistV02Outcomes, releaseIdempotencyKey, updateJobAfterRun,
116
+ setDispatchStatus, handleTriggeredChildren, dequeueJob, log,
117
+ } = deps;
118
+
119
+ finishRun(run.id, 'error', {
120
+ summary,
121
+ error_message: summary,
122
+ });
123
+ persistV02Outcomes(run.id, redactOutcomesForPersistence(outcomes, deps));
124
+ if (state.idemKey) releaseIdempotencyKey(state.idemKey);
125
+ updateJobAfterRun(job, 'error');
126
+ if (state.dispatchRecord) setDispatchStatus(state.dispatchRecord.id, 'done');
127
+ // Security-related aborts (identity/trust/auth/proof/credential failures)
128
+ // should not fire child jobs -- a parent that failed a security gate must
129
+ // not trigger downstream work that may have weaker security requirements.
130
+ if (!opts.skipChildren) {
131
+ handleTriggeredChildren(job.id, 'error', summary, run.id);
132
+ }
133
+ if (dequeueJob(job.id)) {
134
+ log('info', `Dequeued pending dispatch for ${job.name}`);
135
+ }
136
+ return null;
137
+ }
138
+
139
+ /**
140
+ * Uniform post-execution ceremony. Processes the DispatchResult from any strategy.
141
+ *
142
+ * @param {object} job - The job record
143
+ * @param {object} ctx - DispatchContext from prepareDispatch
144
+ * @param {object} result - DispatchResult from the strategy
145
+ * @param {object} deps - Injected dependencies
146
+ */
147
+ export async function finalizeDispatch(job, ctx, result, deps) {
148
+ const {
149
+ finishRun, updateIdempotencyResultHash, releaseIdempotencyKey,
150
+ setAgentStatus, handleDelivery, shouldRetry, scheduleRetry,
151
+ getDb, updateJobAfterRun, setDispatchStatus, handleTriggeredChildren,
152
+ dequeueJob, log,
153
+ } = deps;
154
+
155
+ if (result.earlyReturn) return;
156
+
157
+ // 1. Finish the run
158
+ finishRun(ctx.run.id, result.status, {
159
+ summary: result.summary,
160
+ error_message: result.errorMessage,
161
+ ...result.runFinishFields,
162
+ });
163
+
164
+ // 1b. v0.2 evidence and outcome persistence
165
+ if (ctx.v02Outcomes) {
166
+ const { generateEvidence, persistV02Outcomes } = deps;
167
+ if (job.evidence || job.evidence_ref) {
168
+ const runMetadata = { id: ctx.run.id, status: result.status };
169
+ const evidence = generateEvidence(job, runMetadata, ctx.v02Outcomes);
170
+ if (evidence) ctx.v02Outcomes.evidence_record = evidence;
171
+ }
172
+ persistV02Outcomes(ctx.run.id, redactOutcomesForPersistence(ctx.v02Outcomes, deps));
173
+ }
174
+
175
+ // 1c. Provider cleanup
176
+ if (ctx.materializationCleanup) {
177
+ try {
178
+ const { provider, cleanupState } = ctx.materializationCleanup;
179
+ if (typeof provider.cleanup === 'function') {
180
+ await provider.cleanup(cleanupState, { env: process.env, cwd: process.cwd() });
181
+ }
182
+ } catch (err) {
183
+ log('warn', `Provider cleanup failed for ${job.name}: ${err.message}`, { jobId: job.id });
184
+ }
185
+ }
186
+
187
+ // 2. Idempotency key management
188
+ if (ctx.idemKey) {
189
+ if (result.idemAction === 'keep') {
190
+ updateIdempotencyResultHash(ctx.idemKey, result.content);
191
+ } else if (result.idemAction === 'release') {
192
+ releaseIdempotencyKey(ctx.idemKey);
193
+ }
194
+ // 'noop' -- leave key claimed without writing result hash
195
+ }
196
+
197
+ // 3. Agent status cleanup (only for strategies that set busy)
198
+ if (!result.skipAgentCleanup && job.agent_id) setAgentStatus(job.agent_id, 'idle', null);
199
+
200
+ // 4. Delivery
201
+ if (!result.skipDelivery) {
202
+ const deliveryContent = result.deliveryOverride ?? result.content;
203
+ const shouldAnnounce = ['announce', 'announce-always'].includes(job.delivery_mode)
204
+ && deliveryContent?.trim();
205
+
206
+ if (shouldAnnounce) {
207
+ if (result.deliveryOverride) {
208
+ await handleDelivery(job, result.deliveryOverride);
209
+ } else if (result.status === 'error') {
210
+ const willRetry = (job.max_retries ?? 0) > 0 && (ctx.run.retry_count || 0) < job.max_retries;
211
+ const retryLabel = willRetry ? 'will retry' : 'no retries configured';
212
+ await handleDelivery(job, `\u26a0\ufe0f Job soft-failed (${retryLabel}): ${job.name}\n\n${deliveryContent}`);
213
+ } else {
214
+ await handleDelivery(job, deliveryContent);
215
+ }
216
+ }
217
+ }
218
+
219
+ // 5. Retry on error
220
+ if (result.status === 'error' && shouldRetry(job, ctx.run.id)) {
221
+ const retry = scheduleRetry(job, ctx.run.id);
222
+ if (retry.dispatch) {
223
+ log('info', `Scheduling retry ${retry.retryCount}/${job.max_retries} in ${retry.delaySec}s`, {
224
+ jobId: job.id, runId: ctx.run.id,
225
+ });
226
+ getDb().prepare('UPDATE runs SET retry_count = ? WHERE id = ?').run(retry.retryCount, ctx.run.id);
227
+ if (ctx.dispatchRecord) setDispatchStatus(ctx.dispatchRecord.id, 'done');
228
+ if (!result.skipDequeue && dequeueJob(job.id)) {
229
+ log('info', `Dequeued pending dispatch for ${job.name}`);
230
+ }
231
+ if (result.retryFiresChildren && !result.skipChildren) {
232
+ handleTriggeredChildren(job.id, 'error', result.content, ctx.run.id, ' on soft failure');
233
+ }
234
+ log('info', `Failed: ${job.name} (retry scheduled)`, { runId: ctx.run.id });
235
+ return; // retry path handles everything
236
+ }
237
+ log('warn', `Retry skipped for ${job.name} -- dispatch backlog limit reached`, {
238
+ jobId: job.id, runId: ctx.run.id,
239
+ maxQueuedDispatches: job.max_queued_dispatches || 25,
240
+ });
241
+ // Fall through to steps 6-9: updateJobAfterRun, dispatch status, children, dequeue
242
+ }
243
+
244
+ // 6. Update job state
245
+ if (!result.skipJobUpdate) {
246
+ updateJobAfterRun(job, result.status);
247
+ }
248
+
249
+ // 7. Complete dispatch
250
+ if (ctx.dispatchRecord) {
251
+ setDispatchStatus(ctx.dispatchRecord.id, 'done');
252
+ }
253
+
254
+ // 8. Triggered children
255
+ if (!result.skipChildren) {
256
+ handleTriggeredChildren(job.id, result.status, result.content, ctx.run.id);
257
+ }
258
+
259
+ // 9. Dequeue overlap
260
+ if (!result.skipDequeue && dequeueJob(job.id)) {
261
+ log('info', `Dequeued pending dispatch for ${job.name}`);
262
+ }
263
+ }
264
+
265
+ // -- Phase 1: Guards + run creation --------------------------
266
+
267
+ /**
268
+ * DispatchContext shape (returned by prepareDispatch):
269
+ * {
270
+ * dispatchRecord: object | null,
271
+ * idemKey: string | null,
272
+ * run: object, // the created run record
273
+ * retryCount: number,
274
+ * dispatchKind: string | null,
275
+ * isChainDispatch: boolean,
276
+ * }
277
+ */
278
+
279
+ /**
280
+ * Phase 1: Guards + run creation. Returns DispatchContext or null (guard rejected).
281
+ *
282
+ * @param {object} job
283
+ * @param {object} opts - { approvalBypass, dispatchRecord }
284
+ * @param {object} deps - Injected dependencies
285
+ * @returns {object|null}
286
+ */
287
+ export async function prepareDispatch(job, opts, deps) {
288
+ const {
289
+ claimDispatch, releaseDispatch, setDispatchStatus,
290
+ countPendingApprovalsForJob, getPendingApproval,
291
+ createApproval, createRun, getRun,
292
+ hasRunningRunForPool, hasRunningRun,
293
+ enqueueJob, getDispatchBacklogCount,
294
+ generateIdempotencyKey, generateChainIdempotencyKey,
295
+ generateRunNowIdempotencyKey, claimIdempotencyKey,
296
+ finishRun, getDb,
297
+ sqliteNow, adaptiveDeferralMs,
298
+ handleDelivery, advanceNextRun,
299
+ TICK_INTERVAL_MS,
300
+ log,
301
+ } = deps;
302
+
303
+ const approvalBypass = opts.approvalBypass === true;
304
+ let dispatchRecord = opts.dispatchRecord || null;
305
+
306
+ // Claim pending dispatch
307
+ if (dispatchRecord && dispatchRecord.status === 'pending') {
308
+ dispatchRecord = claimDispatch(dispatchRecord.id);
309
+ if (!dispatchRecord) {
310
+ log('debug', `Skipping claimed dispatch for ${job.name}`, { dispatchId: opts.dispatchRecord.id });
311
+ return null;
312
+ }
313
+ }
314
+
315
+ const completeCurrentDispatch = (status = 'done') => {
316
+ if (!dispatchRecord) return null;
317
+ return setDispatchStatus(dispatchRecord.id, status);
318
+ };
319
+
320
+ const dispatchKind = dispatchRecord?.dispatch_kind || null;
321
+ const isChainDispatch = dispatchKind === 'chain';
322
+ const dispatchBacklogDepth = getDispatchBacklogCount(job.id);
323
+
324
+ // HITL approval gate
325
+ if (job.approval_required && isChainDispatch && !approvalBypass) {
326
+ const pendingApprovalCount = countPendingApprovalsForJob(job.id);
327
+ if (pendingApprovalCount >= (job.max_pending_approvals || 10)) {
328
+ completeCurrentDispatch('cancelled');
329
+ log('warn', `Approval backlog limit reached for ${job.name}`, {
330
+ jobId: job.id,
331
+ pendingApprovals: pendingApprovalCount,
332
+ maxPendingApprovals: job.max_pending_approvals || 10,
333
+ });
334
+ return null;
335
+ }
336
+ const existing = getPendingApproval(job.id);
337
+ if (existing) {
338
+ releaseDispatch(dispatchRecord.id, sqliteNow(adaptiveDeferralMs(dispatchBacklogDepth)));
339
+ log('debug', `Skipping ${job.name} -- approval already pending`, {
340
+ approvalId: existing.id,
341
+ dispatchId: dispatchRecord?.id || null,
342
+ deferredMs: adaptiveDeferralMs(dispatchBacklogDepth),
343
+ });
344
+ return null;
345
+ }
346
+ const run = createRun(job.id, {
347
+ run_timeout_ms: job.run_timeout_ms,
348
+ status: 'awaiting_approval',
349
+ dispatch_queue_id: dispatchRecord?.id || null,
350
+ triggered_by_run: dispatchRecord?.source_run_id || null,
351
+ retry_of: dispatchRecord?.retry_of_run_id || null,
352
+ });
353
+ const approval = createApproval(job.id, run.id, dispatchRecord?.id || null);
354
+ if (dispatchRecord) setDispatchStatus(dispatchRecord.id, 'awaiting_approval');
355
+ log('info', `Approval required for ${job.name} -- awaiting operator`, { approvalId: approval.id, runId: run.id });
356
+ const msg = `\u26a0\ufe0f Job '${job.name}' requires approval.\nApprove: openclaw-scheduler jobs approve ${job.id}\nReject: openclaw-scheduler jobs reject ${job.id}`;
357
+ await handleDelivery({ ...job, delivery_mode: 'announce-always' }, msg);
358
+ return null;
359
+ }
360
+
361
+ // Resource pool concurrency
362
+ if (job.resource_pool && hasRunningRunForPool(job.resource_pool)) {
363
+ log('info', `Skipping ${job.name} -- resource pool '${job.resource_pool}' busy`, { jobId: job.id, pool: job.resource_pool });
364
+ if (dispatchRecord) {
365
+ releaseDispatch(dispatchRecord.id, sqliteNow(TICK_INTERVAL_MS));
366
+ } else {
367
+ advanceNextRun(job);
368
+ }
369
+ return null;
370
+ }
371
+
372
+ // Overlap control
373
+ if (hasRunningRun(job.id)) {
374
+ if (job.overlap_policy === 'skip') {
375
+ log('info', `Skipping ${job.name} -- previous run still active`, { jobId: job.id });
376
+ if (dispatchRecord) {
377
+ completeCurrentDispatch('cancelled');
378
+ } else {
379
+ advanceNextRun(job);
380
+ }
381
+ return null;
382
+ }
383
+ if (job.overlap_policy === 'queue') {
384
+ const queueResult = enqueueJob(job.id);
385
+ if (!queueResult.queued) {
386
+ log('warn', `Queue limit reached for ${job.name} -- dropping overlap dispatch`, {
387
+ jobId: job.id,
388
+ queuedCount: queueResult.queued_count,
389
+ maxQueuedDispatches: job.max_queued_dispatches || 25,
390
+ });
391
+ if (dispatchRecord) {
392
+ completeCurrentDispatch('cancelled');
393
+ } else {
394
+ advanceNextRun(job);
395
+ }
396
+ return null;
397
+ }
398
+ log('info', `Queueing ${job.name} -- previous run still active`, {
399
+ jobId: job.id,
400
+ queuedCount: queueResult.queued_count,
401
+ });
402
+ if (dispatchRecord) {
403
+ completeCurrentDispatch('done');
404
+ } else {
405
+ advanceNextRun(job);
406
+ }
407
+ return null;
408
+ }
409
+ // 'allow' falls through
410
+ }
411
+
412
+ // Idempotency key generation
413
+ const scheduledTime = job.schedule_at || job.next_run_at;
414
+ let idemKey;
415
+ if (dispatchKind === 'chain') {
416
+ idemKey = generateChainIdempotencyKey(dispatchRecord.source_run_id || dispatchRecord.id, job.id);
417
+ } else if (dispatchKind === 'manual') {
418
+ idemKey = generateRunNowIdempotencyKey(job.id);
419
+ } else if (dispatchKind === 'retry') {
420
+ idemKey = generateChainIdempotencyKey(dispatchRecord.retry_of_run_id || dispatchRecord.id, job.id);
421
+ } else {
422
+ idemKey = generateIdempotencyKey(job, scheduledTime);
423
+ }
424
+
425
+ // Idempotency dedup
426
+ if (idemKey) {
427
+ const existing = getDb().prepare("SELECT * FROM idempotency_ledger WHERE key = ? AND status = 'claimed'").get(idemKey);
428
+ if (existing) {
429
+ log('info', `Idempotency skip: ${job.name} (key ${idemKey.slice(0,8)}... already claimed by run ${existing.run_id.slice(0,8)}...)`);
430
+ if (dispatchRecord) {
431
+ completeCurrentDispatch('done');
432
+ } else {
433
+ advanceNextRun(job);
434
+ }
435
+ return null;
436
+ }
437
+ }
438
+
439
+ log('info', `Dispatching: ${job.name}`, { jobId: job.id, target: job.session_target });
440
+
441
+ const retryCount = dispatchKind === 'retry' && dispatchRecord?.retry_of_run_id
442
+ ? (getRun(dispatchRecord.retry_of_run_id)?.retry_count || 0)
443
+ : 0;
444
+
445
+ const run = createRun(job.id, {
446
+ run_timeout_ms: job.run_timeout_ms,
447
+ idempotency_key: idemKey,
448
+ retry_count: retryCount,
449
+ dispatch_queue_id: dispatchRecord?.id || null,
450
+ triggered_by_run: dispatchRecord?.source_run_id || null,
451
+ retry_of: dispatchRecord?.retry_of_run_id || null,
452
+ });
453
+
454
+ // Claim idempotency key
455
+ if (idemKey) {
456
+ const expiresAt = job.delete_after_run
457
+ ? sqliteNow(24 * 60 * 60 * 1000)
458
+ : sqliteNow(7 * 24 * 60 * 60 * 1000);
459
+ const claimed = claimIdempotencyKey(idemKey, job.id, run.id, expiresAt);
460
+ if (!claimed) {
461
+ log('warn', `Idempotency race: ${job.name} key ${idemKey.slice(0,8)}... claimed by concurrent dispatch`);
462
+ finishRun(run.id, 'skipped', { summary: 'Idempotency key already claimed (race)' });
463
+ if (dispatchRecord) {
464
+ completeCurrentDispatch('done');
465
+ } else {
466
+ advanceNextRun(job);
467
+ }
468
+ return null;
469
+ }
470
+ }
471
+
472
+ // v0.2 runtime evaluation
473
+ const {
474
+ resolveIdentity, evaluateTrust, verifyAuthorizationProof,
475
+ evaluateAuthorization, summarizeCredentialHandoff,
476
+ } = deps;
477
+
478
+ // Build provider context for v0.2 runtime calls
479
+ const providerCtx = {
480
+ getIdentityProvider: deps.getIdentityProvider,
481
+ getAuthorizationProvider: deps.getAuthorizationProvider,
482
+ getProofVerifier: deps.getProofVerifier,
483
+ env: process.env,
484
+ cwd: process.cwd(),
485
+ };
486
+
487
+ const v02Outcomes = {};
488
+ const hasV02Identity = hasIdentityDeclaration(job);
489
+ const hasV02Contract = job.contract_required_trust_level;
490
+ const needsAuthorization = job.authorization || job.authorization_ref;
491
+ const shouldResolveIdentity = hasV02Identity || hasV02Contract || needsAuthorization;
492
+
493
+ if (shouldResolveIdentity) {
494
+ v02Outcomes.identity_resolved = await resolveIdentity(job, providerCtx);
495
+ }
496
+
497
+ if (hasV02Identity) {
498
+ const handoff = summarizeCredentialHandoff(job);
499
+ if (handoff) v02Outcomes.credential_handoff_summary = handoff;
500
+ }
501
+
502
+ const hasDeclaredCredentialHandoff = v02Outcomes.credential_handoff_summary
503
+ && (v02Outcomes.credential_handoff_summary.mode != null
504
+ || v02Outcomes.credential_handoff_summary.bindings_count > 0);
505
+ if (hasDeclaredCredentialHandoff && job.session_target !== 'shell') {
506
+ return abortPreparedRun(
507
+ job,
508
+ run,
509
+ 'Credential handoff presentation is only supported for shell jobs',
510
+ v02Outcomes,
511
+ { dispatchRecord, idemKey },
512
+ deps,
513
+ { skipChildren: true },
514
+ );
515
+ }
516
+
517
+ // Child credential policy enforcement.
518
+ // Apply this BEFORE trust/auth evaluation so later gates see the effective
519
+ // identity that will actually be materialized for the run. The policy can
520
+ // narrow (downscope) or remove (none) credentials, and it may also inherit
521
+ // the parent's auth_profile for downstream gateway calls.
522
+ if (job.parent_id) {
523
+ const { getDb: getDatabase } = deps;
524
+ const parentJob = getDatabase().prepare(
525
+ 'SELECT id, child_credential_policy, identity, identity_trust_level, auth_profile FROM jobs WHERE id = ?'
526
+ ).get(job.parent_id);
527
+
528
+ if (parentJob) {
529
+ const effectivePolicy = job.child_credential_policy
530
+ || parentJob.child_credential_policy
531
+ || 'none';
532
+ const parentIdentityBlob = safeParse(parentJob.identity);
533
+ const lastSuccessfulParentRun = (effectivePolicy === 'downscope' || effectivePolicy === 'independent')
534
+ ? getDatabase().prepare(
535
+ 'SELECT identity_resolved FROM runs WHERE job_id = ? AND status = ? ORDER BY started_at DESC LIMIT 1'
536
+ ).get(parentJob.id, 'ok')
537
+ : null;
538
+ const parentResolvedIdentity = lastSuccessfulParentRun?.identity_resolved
539
+ ? safeParse(lastSuccessfulParentRun.identity_resolved)
540
+ : null;
541
+
542
+ if (effectivePolicy === 'none') {
543
+ // No credentials from parent; suppress any identity the child resolved on its own
544
+ v02Outcomes.identity_resolved = null;
545
+ } else if (effectivePolicy === 'inherit' && parentJob.auth_profile) {
546
+ // Inherit parent's auth profile. Store in v02Outcomes rather than
547
+ // mutating the job DB record, which could leak to downstream writes.
548
+ v02Outcomes.effective_auth_profile = parentJob.auth_profile;
549
+ } else if (effectivePolicy === 'downscope') {
550
+ // Downscope: resolve narrower credentials via provider.
551
+ // Fail closed on every path -- if downscope is declared, we must
552
+ // either produce a downscoped session or abort dispatch.
553
+ const providerName = parentIdentityBlob?.provider || parentIdentityBlob?.auth?.provider;
554
+ const provider = deps.getIdentityProvider?.(providerName);
555
+ let downscopeApplied = false;
556
+
557
+ if (provider && typeof provider.prepareHandoff === 'function') {
558
+ // Get parent session from last run or re-resolve
559
+ let parentSession = parentResolvedIdentity?.session || null;
560
+
561
+ if (!parentSession && provider.resolveSession) {
562
+ // Fallback: re-resolve parent identity
563
+ try {
564
+ const parentScope = parentIdentityBlob?.scope || parentIdentityBlob?.auth?.scopes?.[0] || null;
565
+ const reResolved = await provider.resolveSession(
566
+ { profile: parentIdentityBlob, instanceId: parentJob.id, scope: parentScope },
567
+ { env: process.env, cwd: process.cwd() }
568
+ );
569
+ if (reResolved.ok) parentSession = reResolved.session;
570
+ } catch (resolveErr) {
571
+ log('warn', `Downscope parent re-resolve failed for ${job.name}: ${resolveErr.message}`, { jobId: job.id });
572
+ }
573
+ }
574
+
575
+ if (parentSession) {
576
+ const childIdentityBlob = safeParse(job.identity) || {};
577
+ const childScope = childIdentityBlob?.scope || childIdentityBlob?.auth?.scopes?.[0] || null;
578
+
579
+ try {
580
+ const handoffResult = await provider.prepareHandoff(
581
+ parentSession,
582
+ { target_scope: childScope, parent_profile: parentIdentityBlob },
583
+ { env: process.env, cwd: process.cwd() }
584
+ );
585
+
586
+ if (handoffResult.prepared) {
587
+ // Verify handoff actually downscoped: child trust must not
588
+ // exceed parent. A provider that returns an elevated session
589
+ // violates the downscope contract.
590
+ const parentTrustLevel = getIdentityTrustLevel(parentResolvedIdentity)
591
+ || getIdentityTrustLevel({ session: parentSession })
592
+ || getJobTrustLevel(parentJob, parentIdentityBlob);
593
+ const childTrustLevel = getIdentityTrustLevel({ session: handoffResult.session });
594
+ const { compareTrustLevels } = deps;
595
+ if (parentTrustLevel && childTrustLevel && compareTrustLevels(childTrustLevel, parentTrustLevel) > 0) {
596
+ log('warn', `Downscope handoff elevated trust from "${parentTrustLevel}" to "${childTrustLevel}" for ${job.name}`, { jobId: job.id });
597
+ // Do not set downscopeApplied -- will abort below
598
+ } else {
599
+ // Override the identity resolution with the handoff session
600
+ v02Outcomes.identity_resolved = {
601
+ provider: providerName,
602
+ session: handoffResult.session,
603
+ source: 'provider',
604
+ subject_kind: handoffResult.session?.subject?.kind || 'unknown',
605
+ principal: handoffResult.session?.subject?.principal || null,
606
+ trust_level: childTrustLevel,
607
+ delegation_mode: null,
608
+ raw: childIdentityBlob,
609
+ };
610
+ downscopeApplied = true;
611
+ }
612
+ }
613
+ } catch (err) {
614
+ log('warn', `Downscope handoff error for ${job.name}: ${err.message}`, { jobId: job.id });
615
+ }
616
+ }
617
+ }
618
+
619
+ if (!downscopeApplied) {
620
+ const reason = !provider
621
+ ? `identity provider ${providerName || '(none)'} not loaded`
622
+ : typeof provider.prepareHandoff !== 'function'
623
+ ? `provider ${providerName} does not support prepareHandoff`
624
+ : 'parent session unavailable or handoff did not produce a downscoped session';
625
+ return abortPreparedRun(
626
+ job,
627
+ run,
628
+ `Downscope credential policy failed: ${reason}`,
629
+ v02Outcomes,
630
+ { dispatchRecord, idemKey },
631
+ deps,
632
+ { skipChildren: true },
633
+ );
634
+ }
635
+ } else if (effectivePolicy === 'independent') {
636
+ // Child uses its own resolved identity, but cannot exceed the parent's
637
+ // trust level. Without this cap, a child could declare a higher trust
638
+ // level than the parent and bypass the parent's authorization scope.
639
+ const parentTrustLevel = getIdentityTrustLevel(parentResolvedIdentity)
640
+ || getJobTrustLevel(parentJob, parentIdentityBlob);
641
+ const childTrustLevel = v02Outcomes.identity_resolved?.trust_level || null;
642
+ if (parentTrustLevel && childTrustLevel) {
643
+ const { compareTrustLevels } = deps;
644
+ if (compareTrustLevels(childTrustLevel, parentTrustLevel) > 0) {
645
+ return abortPreparedRun(
646
+ job,
647
+ run,
648
+ `Independent child trust level "${childTrustLevel}" exceeds parent trust level "${parentTrustLevel}"`,
649
+ v02Outcomes,
650
+ { dispatchRecord, idemKey },
651
+ deps,
652
+ { skipChildren: true },
653
+ );
654
+ }
655
+ }
656
+ }
657
+ }
658
+ }
659
+
660
+ if (v02Outcomes.identity_resolved?.source === 'provider-error') {
661
+ return abortPreparedRun(
662
+ job,
663
+ run,
664
+ 'Identity resolution failed: ' + (v02Outcomes.identity_resolved.error || 'provider error'),
665
+ v02Outcomes,
666
+ { dispatchRecord, idemKey },
667
+ deps,
668
+ { skipChildren: true },
669
+ );
670
+ }
671
+
672
+ if (hasV02Identity || hasV02Contract || v02Outcomes.identity_resolved != null) {
673
+ v02Outcomes.trust_evaluation = evaluateTrust(job, v02Outcomes.identity_resolved);
674
+ if (v02Outcomes.trust_evaluation?.decision === 'warn') {
675
+ log('warn', `Trust evaluation warning for ${job.name}: ${v02Outcomes.trust_evaluation.reason}`, {
676
+ jobId: job.id,
677
+ runId: run.id,
678
+ });
679
+ }
680
+ if (v02Outcomes.trust_evaluation?.decision === 'deny') {
681
+ return abortPreparedRun(
682
+ job,
683
+ run,
684
+ 'Trust enforcement blocked dispatch: ' + v02Outcomes.trust_evaluation.reason,
685
+ v02Outcomes,
686
+ { dispatchRecord, idemKey },
687
+ deps,
688
+ { skipChildren: true },
689
+ );
690
+ }
691
+ }
692
+
693
+ if (job.authorization_proof || job.authorization_proof_ref) {
694
+ v02Outcomes.authorization_proof_verification = await verifyAuthorizationProof(job, providerCtx);
695
+ if (v02Outcomes.authorization_proof_verification?.verified === false) {
696
+ const proofError = v02Outcomes.authorization_proof_verification.error || 'verification returned false';
697
+ // Proof verification failure is blocking: the job declared a proof
698
+ // requirement, so proceeding without a valid proof violates policy.
699
+ return abortPreparedRun(
700
+ job,
701
+ run,
702
+ 'Authorization proof verification failed: ' + proofError,
703
+ v02Outcomes,
704
+ { dispatchRecord, idemKey },
705
+ deps,
706
+ { skipChildren: true },
707
+ );
708
+ }
709
+ }
710
+
711
+ if (needsAuthorization) {
712
+ v02Outcomes.authorization_decision = await evaluateAuthorization(
713
+ job, v02Outcomes.identity_resolved, v02Outcomes.trust_evaluation, providerCtx
714
+ );
715
+
716
+ if (v02Outcomes.authorization_decision?.decision === 'deny') {
717
+ return abortPreparedRun(
718
+ job,
719
+ run,
720
+ 'Authorization denied: ' + v02Outcomes.authorization_decision.reason,
721
+ v02Outcomes,
722
+ { dispatchRecord, idemKey },
723
+ deps,
724
+ { skipChildren: true },
725
+ );
726
+ }
727
+ if (v02Outcomes.authorization_decision?.decision === 'escalate') {
728
+ // Escalation means the authorization provider wants a human decision.
729
+ // Abort the dispatch so the approval system (or operator) can intervene.
730
+ return abortPreparedRun(
731
+ job,
732
+ run,
733
+ 'Authorization requires escalation: ' + (v02Outcomes.authorization_decision.reason || 'provider requested escalation'),
734
+ v02Outcomes,
735
+ { dispatchRecord, idemKey },
736
+ deps,
737
+ { skipChildren: true },
738
+ );
739
+ }
740
+ if (v02Outcomes.authorization_decision?.advisory) {
741
+ log('warn', `Authorization advisory for ${job.name}: ${v02Outcomes.authorization_decision.reason}`, { jobId: job.id });
742
+ }
743
+ }
744
+
745
+ // Materialization phase
746
+ let materializedEnv = null;
747
+ let materializationCleanup = null;
748
+
749
+ if (v02Outcomes.identity_resolved?.source === 'provider' && v02Outcomes.identity_resolved.session) {
750
+ const providerName = v02Outcomes.identity_resolved.provider;
751
+ const provider = deps.getIdentityProvider?.(providerName);
752
+ const identityBlob = safeParse(job.identity) || {};
753
+ const presentation = identityBlob.presentation || {};
754
+ const hasPresentation = presentation && Object.keys(presentation).length > 0;
755
+
756
+ if (provider && typeof provider.materialize === 'function') {
757
+ try {
758
+ const matResult = await provider.materialize(
759
+ v02Outcomes.identity_resolved.session,
760
+ presentation,
761
+ { env: process.env, cwd: process.cwd() }
762
+ );
763
+ if (matResult?.materialized) {
764
+ materializedEnv = matResult.env_vars || null;
765
+ if (matResult.cleanup_required) {
766
+ materializationCleanup = {
767
+ provider,
768
+ cleanupState: {
769
+ session: v02Outcomes.identity_resolved.session,
770
+ ...matResult,
771
+ },
772
+ };
773
+ }
774
+ } else if (hasPresentation) {
775
+ // Materialization returned false but credentials were declared required
776
+ return abortPreparedRun(
777
+ job,
778
+ run,
779
+ `Credential materialization failed for provider ${providerName}: provider returned materialized=false`,
780
+ v02Outcomes,
781
+ { dispatchRecord, idemKey },
782
+ deps,
783
+ { skipChildren: true },
784
+ );
785
+ }
786
+ } catch (err) {
787
+ if (hasPresentation) {
788
+ return abortPreparedRun(
789
+ job,
790
+ run,
791
+ `Credential materialization error for provider ${providerName}: ${err.message}`,
792
+ v02Outcomes,
793
+ { dispatchRecord, idemKey },
794
+ deps,
795
+ { skipChildren: true },
796
+ );
797
+ }
798
+ // No presentation declared: provider materializes opportunistically.
799
+ // Warn and continue -- the shell job can still run without injected
800
+ // credentials when the identity blob has no presentation block.
801
+ log('warn', `Materialization failed for ${job.name}: ${err.message}`, { jobId: job.id });
802
+ }
803
+ } else if (hasPresentation) {
804
+ // Job declared credential presentation but provider has no materialize method
805
+ return abortPreparedRun(
806
+ job,
807
+ run,
808
+ `Job declares credential presentation but provider ${providerName || '(none)'} does not support materialization`,
809
+ v02Outcomes,
810
+ { dispatchRecord, idemKey },
811
+ deps,
812
+ { skipChildren: true },
813
+ );
814
+ }
815
+ }
816
+
817
+ return { dispatchRecord, idemKey, run, retryCount, dispatchKind, isChainDispatch, v02Outcomes, materializedEnv, materializationCleanup };
818
+ }
819
+
820
+ // -- Strategy: Watchdog --------------------------------------
821
+
822
+ export async function executeWatchdog(job, ctx, deps) {
823
+ const { runShellCommand, handleDelivery, updateJob, deleteJob, log } = deps;
824
+ const result = makeDefaultResult();
825
+ result.skipChildren = true;
826
+ result.skipDequeue = true;
827
+
828
+ const checkCmd = job.watchdog_check_cmd;
829
+ if (!checkCmd) {
830
+ result.status = 'error';
831
+ result.errorMessage = 'Watchdog job missing watchdog_check_cmd';
832
+ result.skipJobUpdate = false;
833
+ return result;
834
+ }
835
+
836
+ const shellExec = await runShellCommand(checkCmd, Math.min(job.run_timeout_ms || 300000, 60000));
837
+ const exitCode = shellExec.exitCode;
838
+ const stdout = (shellExec.stdout || '').trim();
839
+ const stderr = (shellExec.stderr || '').trim();
840
+
841
+ let timedOut = false;
842
+ let elapsedMin = 0;
843
+ if (job.watchdog_started_at && job.watchdog_timeout_min) {
844
+ const startedAt = new Date(job.watchdog_started_at).getTime();
845
+ elapsedMin = Math.round((Date.now() - startedAt) / 60000);
846
+ if (elapsedMin >= job.watchdog_timeout_min) timedOut = true;
847
+ }
848
+
849
+ if (exitCode === 2) {
850
+ result.summary = `Watchdog check failed (transient): ${stderr || stdout}`;
851
+ result.skipDelivery = true;
852
+ log('debug', `Watchdog check transient failure: ${job.name}`, { exitCode, stderr: stderr.slice(0, 200) });
853
+
854
+ } else if (exitCode === 0 && stdout) {
855
+ const completionMsg = `\u2705 [watchdog] Task "${job.watchdog_target_label}" completed -- watchdog disarmed`;
856
+ result.summary = completionMsg;
857
+ result.content = completionMsg;
858
+ log('info', `Watchdog: target completed: ${job.watchdog_target_label}`, { jobId: job.id });
859
+
860
+ if (job.watchdog_alert_channel && job.watchdog_alert_target) {
861
+ await handleDelivery({
862
+ ...job,
863
+ delivery_mode: 'announce-always',
864
+ delivery_channel: job.watchdog_alert_channel,
865
+ delivery_to: job.watchdog_alert_target,
866
+ }, completionMsg);
867
+ }
868
+ result.skipDelivery = true;
869
+
870
+ if (job.watchdog_self_destruct) {
871
+ result.skipJobUpdate = true;
872
+ updateJob(job.id, { enabled: 0 });
873
+ deleteJob(job.id);
874
+ log('info', `Watchdog self-destructed: ${job.name}`, { jobId: job.id });
875
+ }
876
+
877
+ } else if (exitCode === 1 || timedOut) {
878
+ const reason = timedOut
879
+ ? `running for ${elapsedMin}min (threshold: ${job.watchdog_timeout_min}min)`
880
+ : `check command reported stuck`;
881
+ const alertMsg = [
882
+ `\ud83d\udea8 [watchdog] Task "${job.watchdog_target_label}" appears stuck`,
883
+ `- Dispatched: ${job.watchdog_started_at || 'unknown'}`,
884
+ `- Running for: ${elapsedMin} minutes (threshold: ${job.watchdog_timeout_min || '?'} min)`,
885
+ `- Reason: ${reason}`,
886
+ `- Check: ${checkCmd.split(/\s/)[0]}${checkCmd.length > 80 ? ' [...]' : ''}`,
887
+ stderr ? `- Error: ${stderr.slice(0, 500)}` : null,
888
+ stdout ? `- Output: ${stdout.slice(0, 500)}` : null,
889
+ ].filter(Boolean).join('\n');
890
+ result.summary = `Watchdog alert fired: ${reason}`;
891
+ result.content = alertMsg;
892
+
893
+ log('warn', `Watchdog alert: ${job.watchdog_target_label} stuck`, {
894
+ jobId: job.id, elapsedMin, timedOut, exitCode,
895
+ });
896
+
897
+ if (job.watchdog_alert_channel && job.watchdog_alert_target) {
898
+ await handleDelivery({
899
+ ...job,
900
+ delivery_mode: 'announce-always',
901
+ delivery_channel: job.watchdog_alert_channel,
902
+ delivery_to: job.watchdog_alert_target,
903
+ }, alertMsg);
904
+ }
905
+ result.skipDelivery = true;
906
+
907
+ } else if (exitCode === 0) {
908
+ result.summary = `Watchdog check: target still running (${elapsedMin}min elapsed)`;
909
+ result.skipDelivery = true;
910
+ log('debug', `Watchdog: target still running: ${job.watchdog_target_label}`, {
911
+ jobId: job.id, elapsedMin,
912
+ });
913
+ } else {
914
+ result.summary = `Watchdog check command returned unexpected exit code ${exitCode}`;
915
+ result.status = 'error';
916
+ log('warn', `Watchdog: unexpected exit code for ${job.watchdog_target_label}`, {
917
+ jobId: job.id, exitCode, stderr: stderr.slice(0, 200),
918
+ });
919
+ }
920
+
921
+ return result;
922
+ }
923
+
924
+ // -- Strategy: Main session ----------------------------------
925
+
926
+ export async function executeMain(job, ctx, deps) {
927
+ // Main session dispatch mode:
928
+ // - execution_intent 'background' or missing: use executeAgent (sync, waits
929
+ // for response, captures content for delivery). Best for quick tasks where
930
+ // a few seconds of session latency is acceptable.
931
+ // - execution_intent 'fire-and-forget': inject a system event and return
932
+ // immediately. The agent processes asynchronously and the session stays
933
+ // unblocked for interactive DMs. No response capture -- if delivery is
934
+ // configured, the prompt includes a reply-to instruction so the agent
935
+ // can send results via the message tool when done.
936
+ //
937
+ // Choose based on expected duration:
938
+ // Quick tasks (< 10s): sync is simpler and captures output
939
+ // Long tasks (> 30s): fire-and-forget avoids blocking interactive chat
940
+
941
+ const isFireAndForget = job.execution_intent === 'fire-and-forget';
942
+
943
+ if (!isFireAndForget) {
944
+ // Sync path: reuse executeAgent with the main session key.
945
+ // The job's preferred_session_key defaults to 'main' for main-session jobs.
946
+ const originalSessionKey = job.preferred_session_key;
947
+ job.preferred_session_key = job.preferred_session_key || 'main';
948
+ const agentResult = await executeAgent(job, ctx, deps);
949
+ job.preferred_session_key = originalSessionKey;
950
+ return agentResult;
951
+ }
952
+
953
+ // Fire-and-forget path: inject system event, return immediately.
954
+ const { sendSystemEvent, buildExecutionIntentNote, log } = deps;
955
+ const result = makeDefaultResult();
956
+
957
+ const executionNote = buildExecutionIntentNote(job);
958
+ const modelNote = job.payload_thinking
959
+ ? `[SYSTEM NOTE -- model policy]\nPrefer reasoning depth: ${job.payload_thinking}.\n[END SYSTEM NOTE]\n\n`
960
+ : '';
961
+
962
+ // Build the delivery reply-to instruction so the agent can send results
963
+ // back through the scheduler post office when it finishes processing.
964
+ let deliveryInstruction = '';
965
+ if (job.delivery_mode && job.delivery_mode !== 'none' && job.delivery_channel && job.delivery_to) {
966
+ deliveryInstruction = [
967
+ '\n[SYSTEM NOTE -- delivery]',
968
+ `When you have completed this task, send your results using the message tool.`,
969
+ `Channel: ${job.delivery_channel}`,
970
+ `Target: ${job.delivery_to}`,
971
+ `Keep the message concise and actionable.`,
972
+ `If there is nothing noteworthy to report, do not send a message.`,
973
+ '[END SYSTEM NOTE]\n',
974
+ ].join('\n');
975
+ }
976
+
977
+ const prompt = `${executionNote ? `${executionNote}\n\n` : ''}${modelNote}${deliveryInstruction}${job.payload_message}`;
978
+ await sendSystemEvent(prompt, 'now');
979
+
980
+ result.summary = 'System event dispatched (fire-and-forget)';
981
+ result.content = job.payload_message;
982
+ result.skipDelivery = true; // Agent handles delivery via message tool
983
+ result.skipChildren = true;
984
+ result.skipDequeue = true;
985
+
986
+ log('info', `Dispatched (main/fire-and-forget): ${job.name}`, { runId: ctx.run.id });
987
+
988
+ return result;
989
+ }
990
+
991
+ // -- Strategy: Shell -----------------------------------------
992
+
993
+ export async function executeShell(job, ctx, deps) {
994
+ const { runShellCommand, normalizeShellResult, log } = deps;
995
+ const result = makeDefaultResult();
996
+
997
+ const shellExec = await runShellCommand(job.payload_message, job.run_timeout_ms, ctx.materializedEnv || null);
998
+ const shellResult = normalizeShellResult(shellExec, {
999
+ runId: ctx.run.id,
1000
+ timeoutMs: job.run_timeout_ms,
1001
+ storeLimit: job.output_store_limit_bytes || undefined,
1002
+ excerptLimit: job.output_excerpt_limit_bytes || undefined,
1003
+ summaryLimit: job.output_summary_limit_bytes || undefined,
1004
+ offloadThreshold: job.output_offload_threshold_bytes || undefined,
1005
+ });
1006
+
1007
+ result.status = shellResult.status;
1008
+ result.summary = shellResult.summary;
1009
+ result.errorMessage = shellResult.errorMessage;
1010
+ result.content = shellResult.deliveryText;
1011
+ result.runFinishFields = {
1012
+ context_summary: shellResult.contextSummary,
1013
+ shell_exit_code: shellResult.exitCode,
1014
+ shell_signal: shellResult.signal,
1015
+ shell_timed_out: shellResult.timedOut,
1016
+ shell_stdout: shellResult.stdout,
1017
+ shell_stderr: shellResult.stderr,
1018
+ shell_stdout_path: shellResult.stdoutPath,
1019
+ shell_stderr_path: shellResult.stderrPath,
1020
+ shell_stdout_bytes: shellResult.stdoutBytes,
1021
+ shell_stderr_bytes: shellResult.stderrBytes,
1022
+ };
1023
+
1024
+ // Shell delivery logic: announce-always sends on all results, announce sends on error only
1025
+ const announcePayload = shellResult.deliveryText.trim() ? shellResult.deliveryText : shellResult.errorMessage;
1026
+ if (job.delivery_mode === 'announce-always' && announcePayload) {
1027
+ const prefix = shellResult.status === 'ok' ? '' : `\u26a0\ufe0f Shell job failed: ${job.name}\n\n`;
1028
+ result.deliveryOverride = `${prefix}${announcePayload}`;
1029
+ } else if (job.delivery_mode === 'announce' && shellResult.status !== 'ok' && announcePayload) {
1030
+ result.deliveryOverride = announcePayload;
1031
+ } else {
1032
+ result.skipDelivery = true;
1033
+ }
1034
+
1035
+ log('info', `Shell ${shellResult.status}: ${job.name}`, {
1036
+ runId: ctx.run.id,
1037
+ exitCode: shellResult.exitCode,
1038
+ signal: shellResult.signal,
1039
+ timedOut: shellResult.timedOut,
1040
+ });
1041
+
1042
+ return result;
1043
+ }
1044
+
1045
+ // -- Strategy: Agent (isolated session) ----------------------
1046
+
1047
+ export async function executeAgent(job, ctx, deps) {
1048
+ const {
1049
+ waitForGateway, updateRunSession, setAgentStatus,
1050
+ buildJobPrompt, runAgentTurnWithActivityTimeout,
1051
+ updateContextSummary, releaseDispatch, releaseIdempotencyKey,
1052
+ updateJob, matchesSentinel, detectTransientError,
1053
+ listSessions,
1054
+ sqliteNow, log,
1055
+ } = deps;
1056
+ const result = makeDefaultResult();
1057
+
1058
+ // Gateway health check
1059
+ const gatewayReady = await waitForGateway(30000, 2000);
1060
+ if (!gatewayReady) {
1061
+ log('warn', `Gateway unavailable after 30s -- deferring: ${job.name}`, { jobId: job.id });
1062
+ // Strategy handles everything for the gateway-down case
1063
+ deps.finishRun(ctx.run.id, 'error', { error_message: 'Gateway unavailable -- deferred' });
1064
+ if (ctx.idemKey) releaseIdempotencyKey(ctx.idemKey);
1065
+ const deferredAt = sqliteNow(60000);
1066
+ if (ctx.dispatchRecord) {
1067
+ releaseDispatch(ctx.dispatchRecord.id, deferredAt);
1068
+ } else {
1069
+ updateJob(job.id, { next_run_at: deferredAt });
1070
+ }
1071
+ result.earlyReturn = true;
1072
+ return result;
1073
+ }
1074
+
1075
+ // Use a stable session key per job (not per run) so subsequent runs reuse
1076
+ // the warm session. This avoids full agent bootstrap on every dispatch --
1077
+ // memory search, plugin init, and context loading only happen on the first
1078
+ // run. Later runs get a pre-warmed session with context already loaded.
1079
+ const sessionKey = job.preferred_session_key || `scheduler:${job.id}`;
1080
+ updateRunSession(ctx.run.id, sessionKey, null);
1081
+
1082
+ // Mark agent as busy
1083
+ if (job.agent_id) setAgentStatus(job.agent_id, 'busy', sessionKey);
1084
+
1085
+ // Build prompt and collect context metadata
1086
+ const { prompt, contextMeta } = buildJobPrompt(job, ctx.run);
1087
+ try { updateContextSummary(ctx.run.id, contextMeta); } catch (_e) { /* column may not exist yet */ }
1088
+
1089
+ // Resolve auth_profile: use effective profile from child credential policy
1090
+ // if available (set by 'inherit' policy), otherwise fall back to the job's own.
1091
+ let resolvedAuthProfile = ctx.v02Outcomes?.effective_auth_profile || job.auth_profile || undefined;
1092
+ if (resolvedAuthProfile === 'inherit') {
1093
+ try {
1094
+ const sessions = await listSessions({ kinds: ['main'], activeMinutes: 120, limit: 10 });
1095
+ const sessionList = sessions?.result?.details?.sessions || sessions?.result?.sessions || sessions?.sessions || sessions || [];
1096
+ const mainSession = Array.isArray(sessionList)
1097
+ ? sessionList.find(s => {
1098
+ const key = s.key || s.sessionKey || '';
1099
+ return key.includes(':main:') || key.endsWith(':main') || key === 'main';
1100
+ })
1101
+ : null;
1102
+ const profileId = mainSession?.authProfileOverride || mainSession?.authProfile || mainSession?.profile;
1103
+ if (profileId) {
1104
+ resolvedAuthProfile = profileId;
1105
+ log('debug', `Resolved auth_profile 'inherit' -> '${profileId}'`, { jobId: job.id });
1106
+ } else {
1107
+ log('debug', `auth_profile 'inherit' -- no main session profile found, passing 'inherit' as-is`, { jobId: job.id });
1108
+ }
1109
+ } catch (err) {
1110
+ log('warn', `Failed to resolve 'inherit' auth_profile: ${err.message}`, { jobId: job.id });
1111
+ // Fall through with 'inherit' -- gateway may handle it
1112
+ }
1113
+ }
1114
+
1115
+ const turnResult = await runAgentTurnWithActivityTimeout({
1116
+ message: prompt,
1117
+ agentId: job.agent_id || 'main',
1118
+ sessionKey,
1119
+ model: job.payload_model || undefined,
1120
+ authProfile: resolvedAuthProfile,
1121
+ // materializedEnv deferred: the x-openclaw-env-inject header is not sent
1122
+ // until the OpenClaw gateway implements the receiver side. See
1123
+ // openclaw/docs/env-inject-proposal.md for the gateway spec.
1124
+ idleTimeoutMs: (job.payload_timeout_seconds || 120) * 1000,
1125
+ pollIntervalMs: 60000,
1126
+ absoluteTimeoutMs: job.run_timeout_ms || 300000,
1127
+ });
1128
+
1129
+ const content = turnResult.content || '';
1130
+ const trimmed = content.trim();
1131
+
1132
+ const isHeartbeatOk = matchesSentinel(trimmed, 'HEARTBEAT_OK');
1133
+ const isNoFlush = matchesSentinel(trimmed, 'NO_FLUSH');
1134
+ const isIdempotentSkip = matchesSentinel(trimmed, 'IDEMPOTENT_SKIP');
1135
+ const isTaskFailed = matchesSentinel(trimmed, 'TASK_FAILED');
1136
+ const isTransientError = detectTransientError(content);
1137
+
1138
+ if (isNoFlush) log('info', `Flush: nothing to flush for ${job.name}`);
1139
+ if (isIdempotentSkip) log('info', `Idempotent skip (agent): ${job.name}`);
1140
+ if (isTaskFailed) log('warn', `Agent signalled TASK_FAILED: ${job.name}`, { runId: ctx.run.id });
1141
+ if (isTransientError) log('warn', `Transient error detected in agent reply: ${job.name}`, { runId: ctx.run.id, snippet: content.slice(0, 200) });
1142
+
1143
+ const effectiveStatus = (isTaskFailed || isTransientError) ? 'error' : 'ok';
1144
+
1145
+ result.status = effectiveStatus;
1146
+ result.summary = content.slice(0, 5000);
1147
+ result.content = content;
1148
+ result.errorMessage = effectiveStatus === 'error'
1149
+ ? (isTaskFailed ? 'Agent signalled TASK_FAILED' : 'Transient error in agent reply')
1150
+ : null;
1151
+ result.idemAction = effectiveStatus === 'ok' ? 'keep' : 'release';
1152
+ result.skipAgentCleanup = false;
1153
+ result.retryFiresChildren = true;
1154
+
1155
+ // Suppress delivery for sentinel responses
1156
+ if (isHeartbeatOk || isNoFlush || isIdempotentSkip) {
1157
+ result.skipDelivery = true;
1158
+ }
1159
+
1160
+ // Announce mode: only deliver on error (consistent with shell job behavior)
1161
+ if (job.delivery_mode === 'announce' && effectiveStatus === 'ok') {
1162
+ result.skipDelivery = true;
1163
+ }
1164
+
1165
+ log('info', `Completed: ${job.name} (${turnResult.usage?.total_tokens || '?'} tokens)`, {
1166
+ runId: ctx.run.id,
1167
+ durationMs: ctx.run.started_at
1168
+ ? Date.now() - new Date(ctx.run.started_at.replace(' ', 'T') + (ctx.run.started_at.endsWith('Z') ? '' : 'Z')).getTime()
1169
+ : null,
1170
+ });
1171
+
1172
+ return result;
1173
+ }
1174
+
1175
+ // -- Strategy dispatcher with error-catch wrapper ------------
1176
+
1177
+ export async function executeStrategy(job, ctx, deps) {
1178
+ const { handleDelivery, log } = deps;
1179
+ try {
1180
+ if (job.job_type === 'watchdog') return await executeWatchdog(job, ctx, deps);
1181
+ if (job.session_target === 'main') return await executeMain(job, ctx, deps);
1182
+ if (job.session_target === 'shell') return await executeShell(job, ctx, deps);
1183
+ return await executeAgent(job, ctx, deps);
1184
+ } catch (err) {
1185
+ const {
1186
+ finishRun, releaseIdempotencyKey, setAgentStatus,
1187
+ isDrainError, enqueueDispatch, getJob, getDispatchBacklogCount,
1188
+ shouldRetry, scheduleRetry, getDb, updateJobAfterRun,
1189
+ setDispatchStatus, handleTriggeredChildren, dequeueJob,
1190
+ sqliteNow,
1191
+ } = deps;
1192
+
1193
+ log('error', `Failed: ${job.name}: ${err.message}`, { jobId: job.id });
1194
+
1195
+ // -- Drain-error retry for isolated agentTurn jobs ----------
1196
+ // Gateway drain errors are transient infra noise -- the job never ran.
1197
+ // Don't increment consecutive_errors, and schedule a single retry after 90s.
1198
+ const isIsolatedAgent = job.session_target !== 'main' && job.session_target !== 'shell' && job.job_type !== 'watchdog';
1199
+ if (isIsolatedAgent && isDrainError(err.message)) {
1200
+ finishRun(ctx.run.id, 'error', { error_message: err.message });
1201
+ if (ctx.idemKey) releaseIdempotencyKey(ctx.idemKey);
1202
+ if (job.agent_id) setAgentStatus(job.agent_id, 'idle', null);
1203
+
1204
+ // Check: max 1 drain retry per run, job must still be enabled, and respect overlap_policy:skip
1205
+ const freshJob = getJob(job.id);
1206
+ const canDrainRetry = freshJob && freshJob.enabled
1207
+ && (ctx.run.retry_count || 0) < 1
1208
+ && !(freshJob.overlap_policy === 'skip' && getDispatchBacklogCount(job.id) > 0);
1209
+
1210
+ if (canDrainRetry) {
1211
+ const drainDispatch = enqueueDispatch(job.id, {
1212
+ kind: 'retry',
1213
+ scheduled_for: sqliteNow(90000),
1214
+ source_run_id: ctx.run.id,
1215
+ retry_of_run_id: ctx.run.id,
1216
+ });
1217
+ getDb().prepare('UPDATE runs SET retry_count = 1 WHERE id = ?').run(ctx.run.id);
1218
+ log('info', `[drain-retry] scheduling retry for ${job.name} in 90s (run ${ctx.run.id})`, {
1219
+ jobId: job.id, dispatchId: drainDispatch.id,
1220
+ });
1221
+ } else {
1222
+ log('info', `[drain-retry] skipping retry for ${job.name} (enabled=${freshJob?.enabled}, retry_count=${ctx.run.retry_count || 0}, overlap_backlog=${getDispatchBacklogCount(job.id)})`, {
1223
+ jobId: job.id, runId: ctx.run.id,
1224
+ });
1225
+ }
1226
+
1227
+ // Do NOT call updateJobAfterRun -- avoid incrementing consecutive_errors for drain noise
1228
+ if (ctx.dispatchRecord) setDispatchStatus(ctx.dispatchRecord.id, 'done');
1229
+ return { ...makeDefaultResult(), status: 'error', earlyReturn: true };
1230
+ }
1231
+
1232
+ finishRun(ctx.run.id, 'error', { error_message: err.message });
1233
+ if (ctx.idemKey) releaseIdempotencyKey(ctx.idemKey);
1234
+ if (job.agent_id) setAgentStatus(job.agent_id, 'idle', null);
1235
+
1236
+ if (shouldRetry(job, ctx.run.id)) {
1237
+ const retry = scheduleRetry(job, ctx.run.id);
1238
+ if (retry.dispatch) {
1239
+ log('info', `Scheduling retry ${retry.retryCount}/${job.max_retries} in ${retry.delaySec}s`, {
1240
+ jobId: job.id, runId: ctx.run.id,
1241
+ });
1242
+ if (job.delivery_mode === 'announce' || job.delivery_mode === 'announce-always') {
1243
+ const retryMsg = `Job "${job.name}" failed with exception, retry ${retry.retryCount}/${job.max_retries} scheduled`;
1244
+ await handleDelivery(job, retryMsg);
1245
+ }
1246
+ getDb().prepare('UPDATE runs SET retry_count = ? WHERE id = ?').run(retry.retryCount, ctx.run.id);
1247
+ if (ctx.dispatchRecord) setDispatchStatus(ctx.dispatchRecord.id, 'done');
1248
+ if (dequeueJob(job.id)) {
1249
+ log('info', `Dequeued pending dispatch for ${job.name} (after exception-retry)`);
1250
+ }
1251
+ } else {
1252
+ log('warn', `Retry skipped for ${job.name} -- dispatch backlog limit reached`, {
1253
+ jobId: job.id, runId: ctx.run.id,
1254
+ maxQueuedDispatches: job.max_queued_dispatches || 25,
1255
+ });
1256
+ if (['announce', 'announce-always'].includes(job.delivery_mode)) {
1257
+ await handleDelivery(job, `\u26a0\ufe0f Job failed: ${job.name}\n\n${err.message}`);
1258
+ }
1259
+ handleTriggeredChildren(job.id, 'error', err.message, ctx.run.id, ' on exception-retry-skipped');
1260
+ if (dequeueJob(job.id)) {
1261
+ log('info', `Dequeued pending dispatch for ${job.name} (after exception-retry-skipped)`);
1262
+ }
1263
+ updateJobAfterRun(job, 'error');
1264
+ if (ctx.dispatchRecord) setDispatchStatus(ctx.dispatchRecord.id, 'done');
1265
+ }
1266
+ } else {
1267
+ if (['announce', 'announce-always'].includes(job.delivery_mode)) {
1268
+ await handleDelivery(job, `\u26a0\ufe0f Job failed: ${job.name}\n\n${err.message}`);
1269
+ }
1270
+ handleTriggeredChildren(job.id, 'error', err.message, ctx.run.id, ' on failure');
1271
+ if (dequeueJob(job.id)) {
1272
+ log('info', `Dequeued pending dispatch for ${job.name} (after failure)`);
1273
+ }
1274
+ updateJobAfterRun(job, 'error');
1275
+ if (ctx.dispatchRecord) setDispatchStatus(ctx.dispatchRecord.id, 'done');
1276
+ }
1277
+
1278
+ return { ...makeDefaultResult(), status: 'error', earlyReturn: true };
1279
+ }
1280
+ }